Setup¶

Retrieve "GL-DPPD-7111_Mmus_Brain_CellType_GeneMarkers.csv" from this GitHub link and save it in the same directory as this notebook. The GitHub path is nasa/GeneLab_Data_Processing/scRNAseq/10X_Chromium_3prime_Data/GeneLab_CellType_GeneMarkers/GL-DPPD-7111_GeneMarker_Files.

Possible Map My Cells region keys = ["RHP", "RSP", "ACA", "PL-ILA-ORB", "AUD-TEa-PERI-ECT", "SS-GU-VISC", "MO-FRP", "PAL", "sAMY", "CTXsp", "HY", "STRv", "OLF", "LSX", "AI", "STRd", "VIS-PTLp", "VIS", "TH", "MOp", "ENT", "HIP", "P", "MB", "MY", "CB", "AUD", "SSp", "TEa-PERI-ECT"]


Use Conda

While in the folder containing this notebook: conda env create -f rapidsc.yml

Navigate back to where you want to clone the scflow repository (I recommend home) cd

Clone scflow from GitHub. git clone git@github.com:easlinger/scflow.git

Navigate to the folder where scflow is: pip install .

pip install senepy


For NVIDIA Drivers (Linux)

sudo apt update
sudo apt install -y build-essential dkms

sudo apt install -y wget
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin
sudo mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"

sudo apt update
sudo apt install -y cuda

Imports & Display¶

In [1]:
%load_ext autoreload
%autoreload 2
%xmode plain

import os
import re
import logging
import warnings
import json
try:
    import torch
    torch.set_float32_matmul_precision("medium")
except Exception:
    pass
# try:
#     import rapids_singlecell as rsc
# except Exception:
#     rsc = None
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
from pandas.errors import PerformanceWarning
import pandas as pd
import numpy as np
import scflow

pd.set_option("display.max_rows", 500)  # or None for unlimited rows
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 200)




class CategoricalFilter(logging.Filter):
    def filter(self, record):
        msg = record.getMessage()
        # suppress "storing ... as categorical" logs
        if "storing" in msg and "as categorical" in msg:
            return False
        # suppress DataFrame fragmentation warnings
        if "DataFrame is highly fragmented" in msg:
            return False
        return True

# Apply to root logger (or your logger of choice)
logging.getLogger().addFilter(CategoricalFilter())
warnings.simplefilter("ignore", PerformanceWarning)
logger = logging.getLogger("anndata")
Exception reporting mode: Plain
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/jaxopt/__init__.py:59: DeprecationWarning: JAXopt is no longer maintained. See https://docs.jax.dev/en/latest/ for alternatives.
  warnings.warn(
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/marshmallow/__init__.py:17: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
  __version_info__ = tuple(LooseVersion(__version__).version)
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/marshmallow/fields.py:198: RemovedInMarshmallow4Warning: Passing field metadata as a keyword arg is deprecated. Use the explicit `metadata=...` argument instead.
  warnings.warn(
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/pkg_resources/__init__.py:3146: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)

Set Options (ACTIVELY SET THESE!)¶

In [2]:
# Processors to Use
n_processors = os.cpu_count() - 1  # how many processors to use

# If You Want Results Emailed
cur_file = os.path.join(os.path.abspath(""), "create_object_612.ipynb")
html_out = os.path.splitext(cur_file)[0] + ".html"
email = "elizabeth.aslinger@aya.yale.edu"
# set email to None to skip

# Set Data Sources & Species
species = "Mouse"
batches = ["OSD-612"]  # in list even if just 1, or list multiple to integrate

# Set Source Data Directory & Output Options
superdirec = "/home/easlinger/data"  # directory with original data
direcs = [os.path.join(superdirec, i) for i in batches]
# new h5ad will write to "data" sub-directory of where this notebook is
overwrite = True  # allow overwrite of files?
file_concat = os.path.join("data", f"{'_'.join(batches)}_concatenated.h5ad")
file_new = os.path.join("data", f"{'_'.join(batches)}_integrated.h5ad")

# Set Sample & Batch IDs, Plus Other Potential Sources of Batch Effects
col_group = "Group"  # age &/or space flight
col_age = "Characteristics[Age at Euthanasia]"
# col_age = "Factor Value[Age]"
col_condition = "Factor Value[Spaceflight]"
col_sample = "sample"
col_batch = "batch" if len(batches) > 1 else col_group  # group=batch if 1
# covariates_categorical = ["Comment[Euthanasia Date]"]
# covariates_continuous = ["Time"]
covariates_categorical = None
covariates_continuous = None

# Do Sub-Clustering?
# subcluster_biggest = 1  # sub-cluster biggest cluster
# subcluster_biggest = 3  # sub-cluster biggest 3 clusters
subcluster_biggest = False  # no sub-clustering
kws_cluster = dict(n_comps=50)  # cluster individual samples
# kws_cluster = None  # do not cluster individual samples
# vars_regress_out = ["Time", "pct_counts_mt",
#                     "total_counts"]  # to regress out of concatenated object
vars_regress_out = None

# Set Annotation Sources
map_my_cells_source = "WMB-10X" if species == "Mouse" else "WHB-10X" if (
    species == "Human") else None  # Map My Cells atlas source
# map_my_cells_region_keys = None
map_my_cells_region_keys = [
    "RSP", "ACA", "PL-ILA-ORB", "AUD-TEa-PERI-ECT", "SS-GU-VISC", "MO-FRP",
    "AI", "VIS-PTLp", "VIS", "MOp", "AUD", "SSp",
    "TEa-PERI-ECT"]  # regional subset for Map My Cells
map_my_cells_cell_keys = ["Isocortex"]  # pattern match: feature name column
if species == "Mouse":
    model_celltypist = "Mouse_Whole_Brain.pkl"
else:
    raise ValueError("Manually set CellTypist model for non-mouse!")
source_patterns = ["Brain", "Cortical", "cortex"]  # for ToppGene

# Make Pre-Defined Marker Dictionary
cts_superhierarchical = {
    "Neuron": ["Excitatory", "Inhibitory", "Glutamatergic", "GABAergic",
               "Dopaminergic", "Serotonergic", "Cholinergic"]
}  # if classified as Neuron + other, just keep more specific type(s)
rename_marker_based_annotation = {
    "Excitatory | Inhibitory": "Excitatory-Inhibitory",
    "Inhibitory | Inhibitory": "Excitatory-Inhibitory"
}
mks_a_priori = pd.read_csv("GL-DPPD-7111_Mmus_Brain_CellType_GeneMarkers.csv")
mks_a_priori.loc[:, "cellName"] = mks_a_priori.cellName.replace({
    "Oligodendrocyte precursor cell": "OPC"}).apply(
        lambda x: " ".join([i.capitalize() for i in re.sub(
            " cell", "", x).split(" ")])).replace({
                "Opc": "OPC"})  # capitalize cell names
mks_a_priori = mks_a_priori.set_index("cellName")["geneSymbol"]
mks_a_priori = dict(mks_a_priori.apply(lambda x: set(x.split(","))))
mks_a_priori.update({
    "Neuroepithelial": {"Nes", "Notch1", "Sox2", "Sox10", "Hes1", "Hes3"},
    "Excitatory": {"Slc17a7", "Slc30a3", "Tcf4",
                   "Slc17a6", "Slc6a1", "Baiap3",
                   "Grin1", "Grin2b", "Gls"},
    "Inhibitory": {"Gad1", "Slc6a1", "Gabbr1", "Gabbr2",
                   "Gad2", "Slc32a1", "Oprm1", "Htr2c"},
    # "Glutamatergic": {"Slc17a7", "Slc17a6", "Grin1", "Grin2b", "Gls"},
    # "GABAergic": {"Slc6a1", "Gabbr1", "Gabbr2", "Gad2", "Gad1"},
    # "Dopaminergic": {"Th", "Dat", "Foxa2", "Girk2", "Nurr1", "Lmx1b"},
    # "Serotonergic": {"Tph", "Sert", "Pet1"},
    # "Cholinergic": {"ChAT", "VAChT", "Acetylcholinesterase"}
})
markers_predefined = {**mks_a_priori}
if cts_superhierarchical is not None:
    for i in cts_superhierarchical:
        for k in [u for u in cts_superhierarchical[
                i] if u in markers_predefined]:
            markers_predefined[k] = markers_predefined[k].union(
                markers_predefined[i])
    _ = [markers_predefined.pop(i) for i in cts_superhierarchical]
mks_collapsed = {**mks_a_priori}
mks_collapsed["Neuron"] = mks_collapsed["Neuron"].union(mks_collapsed[
    "Excitatory"]).union(mks_collapsed["Inhibitory"])
_ = mks_collapsed.pop("Excitatory")
_ = mks_collapsed.pop("Inhibitory")

Load Individual Sample Data¶

In [3]:
%%time

# Create a Subdirectory of Working Directory for Data Outputs
os.makedirs("data", exist_ok=True)

# Get Metadata
metadata = [pd.read_csv(os.path.join(
    superdirec, f"{i}_metadata_{i}-ISA/s_{i}.txt"), sep=None,
                        engine="python").set_index("Source Name").rename_axis(
                            col_sample) for i in batches]  # list of metadata
for u in np.arange(len(metadata)):  # add a combined age & condition variable
    metadata[u] = metadata[u].join(metadata[u].apply(
        lambda x: x[col_condition] + str(
            " | " + str(x[col_age]) + " Weeks" if (
                col_age in metadata[u].columns and len(
                    metadata[u][col_age].unique()) > 1) else ""),
        axis=1).to_frame(col_group))  # add space flight (x age if applicable)

# Load Data
adatas, files = {}, {}
for u, d in enumerate(direcs):  # iterate directories, then samples within
    for x in [i for i in os.listdir(d) if os.path.isdir(os.path.join(d, i))]:
        ddd = [os.path.join(d, x, i) for i in os.listdir(os.path.join(d, x))]
        if len(ddd) > 1:  # ensure 1 subdirectory (for the sample) in folder
            raise ValueError(f"More than one file: {ddd}")
        sample = os.path.basename(os.path.join(d, x, ddd[0]))  # sample ID
        files[sample] = os.path.join(os.path.join(d, x, ddd[0]),
                                     "filtered_feature_bc_matrix.h5")  # file
        adatas[sample] = sc.read_10x_h5(files[sample])  # read anndata (h5)
        if col_batch:  # if a batch column specified
            adatas[sample].obs.loc[:, col_batch] = batches[u]  # batch => .obs
        adatas[sample].obs.loc[:, col_sample] = sample  # sample ID => .obs
        samp_metadata = metadata[u].loc[sample] if (
            sample in metadata[u].index.values) else metadata[u].set_index(
                "Sample Name").loc[sample]  # extract sample-specific metadata
        for v in samp_metadata.index.values:  # loop metadata => .obs columns
            adatas[sample].obs.loc[:, v] = samp_metadata.loc[v]
        adatas[sample].obs.loc[:, f"n_cells_original_{col_sample}"] = adatas[
            sample].obs.shape[0]  # original number of cells
print(files)
metadata
{'RR10_BRN_GC_WT_G5': '/home/easlinger/data/OSD-612/RR10_BRN_GC_WT_G5-20250307T234232Z-001/RR10_BRN_GC_WT_G5/filtered_feature_bc_matrix.h5', 'RR10_BRN_FLT_WT_F1': '/home/easlinger/data/OSD-612/RR10_BRN_FLT_WT_F1-20250307T231611Z-001/RR10_BRN_FLT_WT_F1/filtered_feature_bc_matrix.h5', 'RR10_BRN_GC_WT_G1': '/home/easlinger/data/OSD-612/RR10_BRN_GC_WT_G1-20250307T210200Z-001/RR10_BRN_GC_WT_G1/filtered_feature_bc_matrix.h5', 'RR10_BRN_GC_WT_G9': '/home/easlinger/data/OSD-612/RR10_BRN_GC_WT_G9-20250307T234621Z-001/RR10_BRN_GC_WT_G9/filtered_feature_bc_matrix.h5', 'RR10_BRN_FLT_WT_F3': '/home/easlinger/data/OSD-612/RR10_BRN_FLT_WT_F3-20250307T205407Z-001/RR10_BRN_FLT_WT_F3/filtered_feature_bc_matrix.h5', 'RR10_BRN_GC_WT_G3': '/home/easlinger/data/OSD-612/RR10_BRN_GC_WT_G3-20250307T235702Z-001/RR10_BRN_GC_WT_G3/filtered_feature_bc_matrix.h5', 'RR10_BRN_FLT_WT_F7': '/home/easlinger/data/OSD-612/RR10_BRN_FLT_WT_F7-20250307T235407Z-001/RR10_BRN_FLT_WT_F7/filtered_feature_bc_matrix.h5', 'RR10_BRN_FLT_WT_F9': '/home/easlinger/data/OSD-612/RR10_BRN_FLT_WT_F9-20250307T210417Z-001/RR10_BRN_FLT_WT_F9/filtered_feature_bc_matrix.h5', 'RR10_BRN_FLT_WT_F5': '/home/easlinger/data/OSD-612/RR10_BRN_FLT_WT_F5-20250307T235113Z-001/RR10_BRN_FLT_WT_F5/filtered_feature_bc_matrix.h5', 'RR10_BRN_GC_WT_G7': '/home/easlinger/data/OSD-612/RR10_BRN_GC_WT_G7-20250308T000029Z-001/RR10_BRN_GC_WT_G7/filtered_feature_bc_matrix.h5'}
CPU times: user 6.25 s, sys: 713 ms, total: 6.96 s
Wall time: 7.01 s
Out[3]:
[                    Sample Name Characteristics[Organism] Term Source REF                              Term Accession Number Characteristics[Strain] Term Source REF.1 Term Accession Number.1  \
 sample                                                                                                                                                                                           
 RR-10_FL-01  RR10_BRN_FLT_WT_F1              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 RR-10_FL-03  RR10_BRN_FLT_WT_F3              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 RR-10_FL-05  RR10_BRN_FLT_WT_F5              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 RR-10_FL-07  RR10_BRN_FLT_WT_F7              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 RR-10_FL-09  RR10_BRN_FLT_WT_F9              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 RR-10_GC-01   RR10_BRN_GC_WT_G1              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 RR-10_GC-03   RR10_BRN_GC_WT_G3              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 RR-10_GC-05   RR10_BRN_GC_WT_G5              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 RR-10_GC-07   RR10_BRN_GC_WT_G7              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 RR-10_GC-09   RR10_BRN_GC_WT_G9              Mus musculus       NCBITAXON  http://purl.bioontology.org/ontology/NCBITAXON...              B6129SF2/J               OSD  https://osdr.nasa.gov/   
 
             Characteristics[Animal Source] Characteristics[Genotype] Term Source REF.2                     Term Accession Number.2 Characteristics[Sex] Term Source REF.3  \
 sample                                                                                                                                                                      
 RR-10_FL-01             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 RR-10_FL-03             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 RR-10_FL-05             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 RR-10_FL-07             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 RR-10_FL-09             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 RR-10_GC-01             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 RR-10_GC-03             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 RR-10_GC-05             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 RR-10_GC-07             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 RR-10_GC-09             Jackson Laboratory                 Wild Type              NCIT  http://purl.obolibrary.org/obo/NCIT_C62195               Female              MESH   
 
                                        Term Accession Number.3 Characteristics[Material Type] Term Source REF.4               Term Accession Number.4 Factor Value[Spaceflight] Term Source REF.5  \
 sample                                                                                                                                                                                              
 RR-10_FL-01  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819              Space Flight              MESH   
 RR-10_FL-03  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819              Space Flight              MESH   
 RR-10_FL-05  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819              Space Flight              MESH   
 RR-10_FL-07  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819              Space Flight              MESH   
 RR-10_FL-09  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819              Space Flight              MESH   
 RR-10_GC-01  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819            Ground Control               OSD   
 RR-10_GC-03  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819            Ground Control               OSD   
 RR-10_GC-05  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819            Ground Control               OSD   
 RR-10_GC-07  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819            Ground Control               OSD   
 RR-10_GC-09  http://purl.bioontology.org/ontology/MESH/D005260       Left cerebral hemisphere               FMA  http://purl.org/sig/ont/fma/fma61819            Ground Control               OSD   
 
                                        Term Accession Number.5 Characteristics[Age at Launch]  Unit Term Source REF.6                    Term Accession Number.6 Characteristics[Age at Euthanasia]  \
 sample                                                                                                                                                                                                
 RR-10_FL-01  http://purl.bioontology.org/ontology/MESH/D013026                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 RR-10_FL-03  http://purl.bioontology.org/ontology/MESH/D013026                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 RR-10_FL-05  http://purl.bioontology.org/ontology/MESH/D013026                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 RR-10_FL-07  http://purl.bioontology.org/ontology/MESH/D013026                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 RR-10_FL-09  http://purl.bioontology.org/ontology/MESH/D013026                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 RR-10_GC-01                             https://osdr.nasa.gov/                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 RR-10_GC-03                             https://osdr.nasa.gov/                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 RR-10_GC-05                             https://osdr.nasa.gov/                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 RR-10_GC-07                             https://osdr.nasa.gov/                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 RR-10_GC-09                             https://osdr.nasa.gov/                        14 - 15  week                UO  http://purl.obolibrary.org/obo/UO_0000034                             18 -19   
 
             Unit.1 Term Source REF.7                    Term Accession Number.7      Protocol REF                          Parameter Value[habitat]  Parameter Value[duration] Unit.2  \
 sample                                                                                                                                                                                  
 RR-10_FL-01   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 RR-10_FL-03   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 RR-10_FL-05   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 RR-10_FL-07   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 RR-10_FL-09   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 RR-10_GC-01   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 RR-10_GC-03   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 RR-10_GC-05   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 RR-10_GC-07   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 RR-10_GC-09   week                UO  http://purl.obolibrary.org/obo/UO_0000034  Animal Husbandry  Rodent Flight Hardware (Transporter and Habitat)                         28    day   
 
             Term Source REF.8                    Term Accession Number.8 Parameter Value[Enrichment material]                  Parameter Value[light cycle]  \
 sample                                                                                                                                                        
 RR-10_FL-01                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 RR-10_FL-03                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 RR-10_FL-05                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 RR-10_FL-07                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 RR-10_FL-09                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 RR-10_GC-01                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 RR-10_GC-03                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 RR-10_GC-05                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 RR-10_GC-07                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 RR-10_GC-09                UO  http://purl.obolibrary.org/obo/UO_0000033                              Cocoons  12 h light/dark cycle, lights on at 7:00 GMT   
 
                                  Parameter Value[Diet] Parameter Value[Feeding Schedule]                 Parameter Value[Euthanasia Method] Parameter Value[Carcass Preservation Method]  \
 sample                                                                                                                                                                                     
 RR-10_FL-01  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 RR-10_FL-03  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 RR-10_FL-05  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 RR-10_FL-07  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 RR-10_FL-09  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 RR-10_GC-01  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 RR-10_GC-03  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 RR-10_GC-05  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 RR-10_GC-07  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 RR-10_GC-09  Nutrient Upgraded Rodent Food Bar (NuRFB)                        ad libitum  Bilateral thoracotomy with sedation, Ketamine/...                                  Cryochiller   
 
              Parameter Value[Body Weight at Euthanasia]         Unit.3 Term Source REF.9                    Term Accession Number.9     Protocol REF.1 Parameter Value[Sample Preservation Method]  \
 sample                                                                                                                                                                                               
 RR-10_FL-01                                         NaN  Not Available               OSD                     https://osdr.nasa.gov/  sample collection                             Liquid Nitrogen   
 RR-10_FL-03                                         NaN  Not Available               OSD                     https://osdr.nasa.gov/  sample collection                             Liquid Nitrogen   
 RR-10_FL-05                                         NaN  Not Available               OSD                     https://osdr.nasa.gov/  sample collection                             Liquid Nitrogen   
 RR-10_FL-07                                         NaN  Not Available               OSD                     https://osdr.nasa.gov/  sample collection                             Liquid Nitrogen   
 RR-10_FL-09                                         NaN  Not Available               OSD                     https://osdr.nasa.gov/  sample collection                             Liquid Nitrogen   
 RR-10_GC-01                                       25.61           gram                UO  http://purl.obolibrary.org/obo/UO_0000021  sample collection                             Liquid Nitrogen   
 RR-10_GC-03                                       23.66           gram                UO  http://purl.obolibrary.org/obo/UO_0000021  sample collection                             Liquid Nitrogen   
 RR-10_GC-05                                       23.68           gram                UO  http://purl.obolibrary.org/obo/UO_0000021  sample collection                             Liquid Nitrogen   
 RR-10_GC-07                                       21.28           gram                UO  http://purl.obolibrary.org/obo/UO_0000021  sample collection                             Liquid Nitrogen   
 RR-10_GC-09                                       22.81           gram                UO  http://purl.obolibrary.org/obo/UO_0000021  sample collection                             Liquid Nitrogen   
 
             Term Source REF.10                    Term Accession Number.10  Parameter Value[Sample Storage Temperature]          Unit.4 Term Source REF.11                   Term Accession Number.11  \
 sample                                                                                                                                                                                                  
 RR-10_FL-01               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 RR-10_FL-03               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 RR-10_FL-05               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 RR-10_FL-07               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 RR-10_FL-09               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 RR-10_GC-01               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 RR-10_GC-03               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 RR-10_GC-05               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 RR-10_GC-07               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 RR-10_GC-09               NCIT  http://purl.obolibrary.org/obo/NCIT_C68796                                          -80  degree Celsius                 UO  http://purl.obolibrary.org/obo/UO_0000027   
 
             Comment[RFID]  Comment[ALSDA Subject ID] Comment[Euthanasia Date] Comment[Euthanasia Time (hh:mm:ss)] Comment[BSP Dissection Date]                    Comment[Source Description]  \
 sample                                                                                                                                                                                          
 RR-10_FL-01    6E3B7A3143                        799              02-Jan-2021                        09:34:00 GMT                  12-May-2021  Frozen carcass dissected on Earth post-flight   
 RR-10_FL-03    6E3D711909                        799              02-Jan-2021                        10:35:00 GMT                  12-May-2021  Frozen carcass dissected on Earth post-flight   
 RR-10_FL-05    6E3D657F3D                        799              02-Jan-2021                        11:18:00 GMT                  12-May-2021  Frozen carcass dissected on Earth post-flight   
 RR-10_FL-07    6E3D710131                        799              02-Jan-2021                        13:26:00 GMT                  12-May-2021  Frozen carcass dissected on Earth post-flight   
 RR-10_FL-09    6E36781618                        799              02-Jan-2021                        14:09:00 GMT                  12-May-2021  Frozen carcass dissected on Earth post-flight   
 RR-10_GC-01    6E3B32156C                        800              05-Jan-2021                        04:34:00 EST                  14-May-2021  Frozen carcass dissected on Earth post-flight   
 RR-10_GC-03    6E37564D01                        800              05-Jan-2021                        05:34:00 EST                  14-May-2021  Frozen carcass dissected on Earth post-flight   
 RR-10_GC-05    6E3C500B24                        800              05-Jan-2021                        06:18:00 EST                  14-May-2021  Frozen carcass dissected on Earth post-flight   
 RR-10_GC-07    6E37071254                        800              05-Jan-2021                        08:26:00 EST                  14-May-2021  Frozen carcass dissected on Earth post-flight   
 RR-10_GC-09    6E3D552B5B                        800              05-Jan-2021                        09:10:00 EST                  14-May-2021  Frozen carcass dissected on Earth post-flight   
 
             Comment[Partial Body Weight on BSP Dissection Date]           Group  
 sample                                                                           
 RR-10_FL-01                                         18.98 gram     Space Flight  
 RR-10_FL-03                                         19.32 gram     Space Flight  
 RR-10_FL-05                                         19.98 gram     Space Flight  
 RR-10_FL-07                                         19.87 gram     Space Flight  
 RR-10_FL-09                                         21.45 gram     Space Flight  
 RR-10_GC-01                                      Not Available   Ground Control  
 RR-10_GC-03                                      Not Available   Ground Control  
 RR-10_GC-05                                      Not Available   Ground Control  
 RR-10_GC-07                                      Not Available   Ground Control  
 RR-10_GC-09                                      Not Available   Ground Control  ]

QC¶

Perform Sample-Specific QC¶

In [4]:
%matplotlib inline

plot_qc = False  # change to True to get sample-level QC plots (a bit slow)
qcs, n_cells_by_counts, descriptives, figs = scflow.pp.perform_qc_multi(
    adatas, col_batch=col_batch, col_sample=col_sample, plot=plot_qc,
    percentiles=[0.025, 0.10, 0.25, 0.50, 0.75, 0.85, 0.90, 0.975],
    figsize=(10, 10))  # perform QC on individual samples
for x in qcs:  # iterate QC metrics % plot percentiles by group
    fig = sns.catplot(qcs, y=x, hue=col_batch, kind="violin")
    fig.fig.suptitle(x)
    fig = sns.catplot(descriptives.loc[:, :, x][[
        i for i in descriptives if ("%" in i)]].stack().to_frame("Value"),
                      x="Metric", y="Value", kind="bar",
                      hue=col_batch, height=10)
    fig.fig.suptitle(x)
descriptives.stack().unstack("Variable").round()
Out[4]:
Variable n_cells_by_counts n_genes_by_counts pct_counts_mt total_counts
sample Group Metric
RR10_BRN_FLT_WT_F1 Space Flight count 56748.0 11879.0 11879.0 11879.0
mean 398.0 1900.0 1.0 4298.0
std 1096.0 1096.0 1.0 3711.0
min 0.0 44.0 0.0 48.0
2.5% 0.0 395.0 0.0 523.0
10% 0.0 540.0 0.0 761.0
25% 0.0 892.0 0.0 1387.0
50% 4.0 1917.0 0.0 3680.0
75% 153.0 2593.0 1.0 5865.0
85% 625.0 3003.0 1.0 7406.0
90% 1195.0 3306.0 2.0 8687.0
97.5% 3985.0 4238.0 5.0 13230.0
max 11876.0 11082.0 25.0 93231.0
RR10_BRN_FLT_WT_F3 Space Flight count 56748.0 12875.0 12875.0 12875.0
mean 384.0 1692.0 4.0 3583.0
std 1086.0 986.0 5.0 3186.0
min 0.0 9.0 0.0 9.0
2.5% 0.0 362.0 0.0 488.0
10% 0.0 532.0 0.0 783.0
25% 0.0 966.0 1.0 1546.0
50% 4.0 1576.0 2.0 2850.0
75% 144.0 2158.0 4.0 4414.0
85% 600.0 2629.0 7.0 5865.0
90% 1128.0 2976.0 10.0 7074.0
97.5% 3794.0 4068.0 20.0 12019.0
max 12866.0 9150.0 47.0 46452.0
RR10_BRN_FLT_WT_F5 Space Flight count 56748.0 3307.0 3307.0 3307.0
mean 94.0 1620.0 2.0 3400.0
std 264.0 960.0 2.0 2795.0
min 0.0 64.0 0.0 98.0
2.5% 0.0 313.0 0.0 423.0
10% 0.0 429.0 0.0 601.0
25% 0.0 678.0 0.0 1017.0
50% 1.0 1694.0 1.0 3107.0
75% 36.0 2235.0 2.0 4674.0
85% 149.0 2581.0 3.0 5749.0
90% 282.0 2835.0 3.0 6681.0
97.5% 923.0 3718.0 7.0 10531.0
max 3306.0 5592.0 46.0 22030.0
RR10_BRN_FLT_WT_F7 Space Flight count 56748.0 5075.0 5075.0 5075.0
mean 125.0 1401.0 2.0 2880.0
std 358.0 988.0 4.0 3042.0
min 0.0 17.0 0.0 17.0
2.5% 0.0 211.0 0.0 265.0
10% 0.0 388.0 0.0 520.0
25% 0.0 548.0 0.0 798.0
50% 1.0 1294.0 1.0 2140.0
75% 47.0 1972.0 2.0 3890.0
85% 193.0 2332.0 4.0 4921.0
90% 366.0 2640.0 6.0 6102.0
97.5% 1224.0 3747.0 14.0 10972.0
max 5049.0 10616.0 41.0 75161.0
RR10_BRN_FLT_WT_F9 Space Flight count 56748.0 3562.0 3562.0 3562.0
mean 103.0 1641.0 3.0 3590.0
std 294.0 915.0 4.0 2933.0
min 0.0 52.0 0.0 76.0
2.5% 0.0 360.0 0.0 499.0
10% 0.0 494.0 0.0 725.0
25% 0.0 814.0 1.0 1315.0
50% 1.0 1669.0 1.0 3163.0
75% 38.0 2190.0 3.0 4717.0
85% 156.0 2489.0 5.0 5791.0
90% 295.0 2757.0 6.0 6753.0
97.5% 1030.0 3788.0 14.0 11845.0
max 3559.0 5328.0 46.0 23456.0
RR10_BRN_GC_WT_G1 Ground Control count 56748.0 7937.0 7937.0 7937.0
mean 241.0 1721.0 2.0 3588.0
std 674.0 1005.0 3.0 3127.0
min 0.0 18.0 0.0 24.0
2.5% 0.0 389.0 0.0 513.0
10% 0.0 528.0 0.0 750.0
25% 0.0 860.0 0.0 1330.0
50% 3.0 1668.0 1.0 2988.0
75% 93.0 2277.0 2.0 4711.0
85% 376.0 2679.0 3.0 6023.0
90% 710.0 2980.0 5.0 7061.0
97.5% 2367.0 4031.0 11.0 11378.0
max 7933.0 11210.0 38.0 65469.0
RR10_BRN_GC_WT_G3 Ground Control count 56748.0 15280.0 15280.0 15280.0
mean 378.0 1402.0 3.0 2801.0
std 1129.0 810.0 3.0 2375.0
min 0.0 14.0 0.0 16.0
2.5% 0.0 292.0 0.0 387.0
10% 0.0 422.0 0.0 597.0
25% 0.0 778.0 1.0 1195.0
50% 4.0 1343.0 1.0 2359.0
75% 134.0 1800.0 3.0 3530.0
85% 551.0 2162.0 4.0 4530.0
90% 1046.0 2458.0 6.0 5488.0
97.5% 3708.0 3313.0 13.0 8888.0
max 15274.0 9847.0 41.0 50964.0
RR10_BRN_GC_WT_G5 Ground Control count 56748.0 4231.0 4231.0 4231.0
mean 123.0 1651.0 2.0 3511.0
std 351.0 914.0 3.0 2765.0
min 0.0 72.0 0.0 87.0
2.5% 0.0 336.0 0.0 451.0
10% 0.0 477.0 0.0 693.0
25% 0.0 818.0 0.0 1272.0
50% 1.0 1678.0 1.0 3106.0
75% 45.0 2254.0 2.0 4864.0
85% 187.0 2590.0 4.0 5986.0
90% 356.0 2837.0 5.0 6887.0
97.5% 1222.0 3541.0 12.0 10013.0
max 4228.0 8837.0 50.0 48219.0
RR10_BRN_GC_WT_G7 Ground Control count 56748.0 3926.0 3926.0 3926.0
mean 110.0 1596.0 2.0 3352.0
std 316.0 852.0 4.0 2616.0
min 0.0 37.0 0.0 39.0
2.5% 0.0 331.0 0.0 438.0
10% 0.0 483.0 0.0 703.0
25% 0.0 884.0 0.0 1422.0
50% 1.0 1642.0 1.0 3086.0
75% 43.0 2086.0 2.0 4410.0
85% 169.0 2345.0 4.0 5198.0
90% 318.0 2578.0 5.0 5919.0
97.5% 1080.0 3486.0 14.0 9604.0
max 3922.0 7450.0 63.0 34684.0
RR10_BRN_GC_WT_G9 Ground Control count 56748.0 8496.0 8496.0 8496.0
mean 181.0 1209.0 3.0 2292.0
std 551.0 783.0 4.0 2060.0
min 0.0 9.0 0.0 9.0
2.5% 0.0 239.0 0.0 300.0
10% 0.0 324.0 0.0 431.0
25% 0.0 510.0 0.0 719.0
50% 2.0 1164.0 1.0 1898.0
75% 61.0 1679.0 3.0 3140.0
85% 261.0 1977.0 5.0 3956.0
90% 497.0 2216.0 7.0 4646.0
97.5% 1766.0 2981.0 15.0 7469.0
max 8489.0 8067.0 57.0 35718.0
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Auto-Detect Filtering Thresholds¶

Use 2.5th and/& 97.5th percentile (sample-specific) as minimum genes per cell and minimum and maximum total counts (subject to specified absolute minima). Use 97.5th percentile as upper bound for percent mitochondrial count.

Use an absolute minimum cells per gene.

Also include arguments to run a PCA on individual samples before integrating.

To preview the effect of setting different absolute minima/maxima, use code such as the following to see how much data may be dropped:

descriptives.stack().unstack("Variable")["n_genes_by_counts"].loc[:, :, "85%", :]
descriptives.stack().unstack("Variable")["pct_mt"].loc[:, :, "85%", :]
descriptives.stack().unstack("Variable")["total_counts"].loc[:, :, "90%", :]
descriptives.stack().unstack("Variable")["total_counts"].loc[:, :, "10%", :]
descriptives.stack().unstack("Variable")["n_cells_by_counts"].loc[:, :, "10%", :]
In [5]:
# Options
bounds = descriptives[["2.5%", "97.5%"]].apply(lambda x: list(
    x), axis=1).unstack("Variable")  # list top/bottom 5% (~sample, variable)
abs_min_cells = 3  # regardless of %ile, minimum cells to retain gene
abs_min_genes = 200  # regardless of %ile, minimum genes to retain cell
abs_min_count = 300  # regardless of %ile, minimum counts to retain cell
abs_max_mt = 5  # regardless of %ile, absolute maximum mitochonrial content
n_top_genes = 2000  # number of top genes to count as HVGs

# Set Thresholds
kws_pp = {}
for x in adatas:
    b_x = bounds.loc[x]
    b_counts =  b_x["total_counts"] if isinstance(b_x[
            "total_counts"], list) else b_x["total_counts"].iloc[0]
    b_counts = [max(b_counts[0], abs_min_count), b_counts[1]]
    kws_pp[x] = {
        "min_max_genes": [round(i) if i else i for i in [max((b_x[
            "n_genes_by_counts"] if isinstance(b_x[
                "n_genes_by_counts"], list) else b_x[
                    "n_genes_by_counts"].iloc[0])[0], abs_min_genes), None]],
        "min_max_cells": [round(abs_min_cells), None],
        # "min_max_cells": [max((b_x["n_cells_by_counts"] if isinstance(
        #     b_x["n_genes_by_counts"], list) else b_x[
        #         "n_genes_by_counts"].iloc[0])[0], abs_min_cells), None],
        # "max_mt": min(abs_max_mt, (b_x["pct_counts_mt"] if isinstance(b_x[
        #     "pct_counts_mt"], list) else b_x["pct_counts_mt"].iloc[0])[1]),
        "max_mt": abs_max_mt,
        "min_max_counts": [round(i) if i else i for i in b_counts],
        # "vars_regress_out": ["total_counts"],
        "target_sum": 1e4,
        "zero_center": True, "max_value": 10,  # scaling
        "n_top_genes": n_top_genes,
        "doublet_detection": "drop"
    }
print("\n".join([f"{s}: {kws_pp[s]}" for s in kws_pp]))
pd.DataFrame(kws_pp).T
RR10_BRN_GC_WT_G5: {'min_max_genes': [336, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [451, 10013], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RR10_BRN_FLT_WT_F1: {'min_max_genes': [395, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [523, 13230], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RR10_BRN_GC_WT_G1: {'min_max_genes': [389, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [513, 11378], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RR10_BRN_GC_WT_G9: {'min_max_genes': [239, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [300, 7469], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RR10_BRN_FLT_WT_F3: {'min_max_genes': [362, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [488, 12019], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RR10_BRN_GC_WT_G3: {'min_max_genes': [292, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [387, 8888], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RR10_BRN_FLT_WT_F7: {'min_max_genes': [211, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [300, 10972], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RR10_BRN_FLT_WT_F9: {'min_max_genes': [360, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [499, 11845], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RR10_BRN_FLT_WT_F5: {'min_max_genes': [313, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [423, 10531], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RR10_BRN_GC_WT_G7: {'min_max_genes': [331, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [438, 9604], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
Out[5]:
min_max_genes min_max_cells max_mt min_max_counts target_sum zero_center max_value n_top_genes doublet_detection
RR10_BRN_GC_WT_G5 [336, None] [3, None] 5 [451, 10013] 10000.0 True 10 2000 drop
RR10_BRN_FLT_WT_F1 [395, None] [3, None] 5 [523, 13230] 10000.0 True 10 2000 drop
RR10_BRN_GC_WT_G1 [389, None] [3, None] 5 [513, 11378] 10000.0 True 10 2000 drop
RR10_BRN_GC_WT_G9 [239, None] [3, None] 5 [300, 7469] 10000.0 True 10 2000 drop
RR10_BRN_FLT_WT_F3 [362, None] [3, None] 5 [488, 12019] 10000.0 True 10 2000 drop
RR10_BRN_GC_WT_G3 [292, None] [3, None] 5 [387, 8888] 10000.0 True 10 2000 drop
RR10_BRN_FLT_WT_F7 [211, None] [3, None] 5 [300, 10972] 10000.0 True 10 2000 drop
RR10_BRN_FLT_WT_F9 [360, None] [3, None] 5 [499, 11845] 10000.0 True 10 2000 drop
RR10_BRN_FLT_WT_F5 [313, None] [3, None] 5 [423, 10531] 10000.0 True 10 2000 drop
RR10_BRN_GC_WT_G7 [331, None] [3, None] 5 [438, 9604] 10000.0 True 10 2000 drop

Integrate¶

In-Memory Approach¶

See the "On-Disk Approach" for a more sophisticated approach to defining kws_integrate (more options).

In [6]:
# %%time

# # Integrate
# join_method = "outer"  # or "inner"
# kws_integrate = {"kws_pp": kws_pp, "n_top_genes": 10000,
#                  "fill_value": np.nan,
#                  "col_batch": None,  # suppress using batch as covariate
#                  "join": join_method, "merge": "unique", "use_rapids": True}
# self = scflow.Rna(adatas, col_sample=col_sample, col_batch=col_batch,
#                   kws_integrate=kws_integrate)
# self.rna.obs = self.rna.obs.assign(kws_integrate=str(kws_integrate))

# # Write Files for Processed/Integrated Objects?
# if overwrite is True or not os.path.exists(file_new):
#     os.makedirs("data", exist_ok=True)
#     self.rna.write_h5ad(file_new)
# for x in adatas:
#     pfp = os.path.join("data", f"{x}_processed.h5ad")
#     if overwrite is True or not os.path.exists(pfp):
#         adatas[x].write_h5ad(pfp)
# del adatas  # to save memory; now integrated in `self.rna`

# # Display
# print(self.rna)
# # print(self.rna.var.head())
# self.rna.obs

On-Disk Approach¶

Preprocess Individual¶

Filter & Normalize¶

In [7]:
# Preprocess
if overwrite is False and os.path.exists(pfp):
    raise ValueError("Must be able to overwrite to use on-disk option")
files_individual = dict(zip(files.keys(), [os.path.join(
    "data", f"{x}_processed.h5ad") for x in files]))  # new individual files
var_names = []  # to store genes not filtered out for each sample
for x in files:  # iterate sample files
    print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}")
    adatas[x].obs.loc[:, f"kws_pp_{col_sample}"] = str(kws_pp[x])  # store kws
    adatas[x] = scflow.pp.preprocess(
        adatas[x], **kws_pp[x], plot_qc=False)  # preprocess data
    var_names += [set(adatas[x].var_names)]  # track what genes still present

# Decide Join Method
shared_genes = set.intersection(*var_names)  # genes in all after filtering
all_genes = set.union(*var_names)  # genes in any post-filter sample
print(f"{len(shared_genes)} genes present in all samples post-filtering (out"
      f" of {len(all_genes)} total genes present in any post-filter sample)")
join_method = "inner" if len(shared_genes) / len(all_genes) >= 0.5 else \
    "outer"  # outer join if <1/2 of genes shared across all samples

# Show Number of Cells Pre- & Post-Filtering
n_cells = pd.concat([pd.Series([
    adatas[x].obs.shape[0] for x in adatas], index=adatas).to_frame(
        "N Cells"), pd.Series([adatas[x].obs["n_cells_original_sample"].iloc[
            0] for x in adatas], index=adatas).to_frame(
                "N Cells Original")], axis=1)
n_cells = n_cells.assign(Percent_Dropped=100 * (1 - n_cells[
    "N Cells"] / n_cells["N Cells Original"]))
n_cells

================================================================================
RR10_BRN_GC_WT_G5
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 4231 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 12 cells that have less than 336 genes expressed
	***Filtering genes by cells...
filtered out 31450 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.41
Detected doublet rate = 0.4%
Estimated detectable doublet fraction = 12.3%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 3.4%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...


================================================================================
RR10_BRN_FLT_WT_F1
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 11879 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 37 cells that have less than 395 genes expressed
	***Filtering genes by cells...
filtered out 26568 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.12
Detected doublet rate = 10.1%
Estimated detectable doublet fraction = 65.8%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 15.4%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...


================================================================================
RR10_BRN_GC_WT_G1
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 7937 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 15 cells that have less than 389 genes expressed
	***Filtering genes by cells...
filtered out 28263 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.49
Detected doublet rate = 0.0%
Estimated detectable doublet fraction = 0.2%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 8.7%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...


================================================================================
RR10_BRN_GC_WT_G9
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 8496 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 26 cells that have less than 239 genes expressed
	***Filtering genes by cells...
filtered out 30148 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.49
Detected doublet rate = 0.0%
Estimated detectable doublet fraction = 0.7%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 6.5%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...


================================================================================
RR10_BRN_FLT_WT_F3
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 12875 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 35 cells that have less than 362 genes expressed
	***Filtering genes by cells...
filtered out 26867 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.46
Detected doublet rate = 0.0%
Estimated detectable doublet fraction = 0.1%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 0.0%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...


================================================================================
RR10_BRN_GC_WT_G3
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 15280 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 32 cells that have less than 292 genes expressed
	***Filtering genes by cells...
filtered out 26677 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.45
Detected doublet rate = 0.0%
Estimated detectable doublet fraction = 0.0%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 44.4%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...


================================================================================
RR10_BRN_FLT_WT_F7
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 5075 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 1 cells that have less than 211 genes expressed
	***Filtering genes by cells...
filtered out 31379 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.14
Detected doublet rate = 6.8%
Estimated detectable doublet fraction = 55.3%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 12.3%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...


================================================================================
RR10_BRN_FLT_WT_F9
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 3562 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 9 cells that have less than 360 genes expressed
	***Filtering genes by cells...
filtered out 31890 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.38
Detected doublet rate = 0.4%
Estimated detectable doublet fraction = 9.7%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 4.2%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...


================================================================================
RR10_BRN_FLT_WT_F5
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 3307 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 7 cells that have less than 313 genes expressed
	***Filtering genes by cells...
filtered out 32229 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.18
Detected doublet rate = 4.6%
Estimated detectable doublet fraction = 60.8%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 7.5%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...


================================================================================
RR10_BRN_GC_WT_G7
================================================================================
	***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 3926 × 56748
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    layers: 'counts'
	***Filtering cells by counts...
	***Filtering cells by genes...
filtered out 8 cells that have less than 331 genes expressed
	***Filtering genes by cells...
filtered out 31180 genes that are detected in less than 3 cells
	***Filtering cells by mitochondrial gene content...
	***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.14
Detected doublet rate = 7.8%
Estimated detectable doublet fraction = 65.3%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 12.0%
	***Normalizing...
	***Detecting highly variable genes...
	***Scaling data...
21692 genes present in all samples post-filtering (out of 33273 total genes present in any post-filter sample)
Out[7]:
N Cells N Cells Original Percent_Dropped
RR10_BRN_GC_WT_G5 3609 4231 14.701016
RR10_BRN_FLT_WT_F1 9930 11879 16.407105
RR10_BRN_GC_WT_G1 6826 7937 13.997732
RR10_BRN_GC_WT_G9 6823 8496 19.691620
RR10_BRN_FLT_WT_F3 9550 12875 25.825243
RR10_BRN_GC_WT_G3 12739 15280 16.629581
RR10_BRN_FLT_WT_F7 3994 5075 21.300493
RR10_BRN_FLT_WT_F9 2924 3562 17.911286
RR10_BRN_FLT_WT_F5 2833 3307 14.333233
RR10_BRN_GC_WT_G7 3098 3926 21.090168

Test Preprocessing¶

Unit tests

In [8]:
for p, ann in zip([kws_pp[x] for x in kws_pp], [adatas[x] for x in adatas]):
    print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}\n")
    if join_method == "inner":
        assert all(ann.var["n_cells_by_counts"] >= p["min_max_cells"][0])
        assert all(ann.var["n_cells_by_counts"] <= p[
            x]["min_max_cells"][1]) if kws_pp[x]["min_max_cells"][1] else True
    assert all(ann.obs["n_genes_by_counts"] >= p["min_max_genes"][0])
    assert all(ann.obs["n_genes_by_counts"] <= p[
        "min_max_genes"][1]) if p["min_max_genes"][1] else True
    assert all(ann.obs["pct_counts_mt"] <= p["max_mt"])
    assert all(ann.obs["total_counts"] >= p["min_max_counts"][0])
    assert all(ann.obs["total_counts"] <= p["min_max_counts"][1]) if (
        p["min_max_counts"][1]) else True
    print(p)
    print(ann.obs[["n_genes", "pct_counts_mt", "total_counts"]
                  ].describe().loc[["min", "max"]])
    print(ann.var[["n_cells_by_counts"]].describe().loc[[
        "min", "max"]])

================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [336, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [451, 10013], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    337.0            0.0         455.0
max   3924.0            5.0       10012.0
     n_cells_by_counts
min                3.0
max             4228.0


================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [395, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [523, 13230], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    395.0       0.000000         523.0
max   4749.0       4.987212       13230.0
     n_cells_by_counts
min                3.0
max            11876.0


================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [389, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [513, 11378], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    389.0            0.0         513.0
max   4637.0            5.0       11375.0
     n_cells_by_counts
min                3.0
max             7933.0


================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [239, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [300, 7469], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    239.0            0.0         300.0
max   3721.0            5.0        7469.0
     n_cells_by_counts
min                3.0
max             8489.0


================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [362, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [488, 12019], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    362.0       0.000000         488.0
max   4813.0       4.997128       12018.0
     n_cells_by_counts
min                3.0
max            12866.0


================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [292, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [387, 8888], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    292.0            0.0         387.0
max   3948.0            5.0        8888.0
     n_cells_by_counts
min                3.0
max            15274.0


================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [211, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [300, 10972], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    230.0       0.000000         301.0
max   4462.0       4.992658       10957.0
     n_cells_by_counts
min                3.0
max             5049.0


================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [360, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [499, 11845], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    360.0            0.0         502.0
max   4342.0            5.0       11845.0
     n_cells_by_counts
min                3.0
max             3559.0


================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [313, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [423, 10531], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    314.0       0.000000         425.0
max   4113.0       4.979253       10529.0
     n_cells_by_counts
min                3.0
max             3306.0


================================================================================
RR10_BRN_GC_WT_G7
================================================================================

{'min_max_genes': [331, None], 'min_max_cells': [3, None], 'max_mt': 5, 'min_max_counts': [438, 9604], 'target_sum': 10000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
     n_genes  pct_counts_mt  total_counts
min    331.0       0.000000         438.0
max   3620.0       4.991394        9531.0
     n_cells_by_counts
min                3.0
max             3922.0

Cluster Individual¶

A Priori Clustering Parameters¶

In [9]:
# %%time

# # Clustering Parameter Options (Only Applied If `kws_cluster` != None)
# resolution_individual, min_dist_individual = 0.15, 1.5
# col_cluster_individual = "annotation_by_markers_individual"
# unlabeled_cat = "Heterogeneous"  # if can't find one best-fit cell label
# cci_scanvi = col_cluster_individual + "_heterogeneous_collapsed"
# sep = " | "  # separator for heterogeneous annotations

# # Preprocessing & (Optionally) Clustering
# if overwrite is False and os.path.exists(pfp):
#     raise ValueError("Must be able to overwrite to use on-disk option")
# # del adatas
# for x in files:  # iterate sample files
#     if kws_cluster is not None:  # cluster individual sample?
#         kws_cl = {"resolution": resolution_individual[x],
#                   "min_dist": min_dist_individual[x], **kws_cluster}
#         self.rna.obs.loc[:, "kws_cluster_individual"] = str(kws_cl)
#         adatas[x] = scflow.pp.cluster(
#             adatas[x], resolution=resolution_individual,
#             min_dist=min_dist_individual, plot=False,
#             col_celltype="leiden_individual", **kws_cl)  # cluster
#         sc.tl.rank_genes_groups(
#             adatas[x], "leiden_individual", n_genes=None, rankby_abs=False,
#             key_added="rank_genes_groups_leiden_individual",
#             copy=False)  # find markers/DEGs
#         if markers_predefined is not None:  # markers specified?
#             _, adatas[x] = scflow.pp.annotate_by_marker_overlap(
#                 adatas[x], markers_predefined,
#                 col_celltype="leiden_individual",
#                 col_celltype_new=col_cluster_individual, sep=sep,
#                 celltypes_superhierarchical=cts_superhierarchical,
#                 # top_n_markers=20,
#                 adj_pval_threshold=1e-10,
#                 method="overlap_coef", inplace=True)  # annotate by markers
#             if rename_marker_based_annotation is not None:
#                 adatas[x].obs.loc[:, col_cluster_individual] = adatas[
#                     x].obs[col_cluster_individual].replace(
#                         rename_marker_based_annotation)  # re-name
#             adatas[x].obs.loc[:, cci_scanvi] = adatas[x].obs[
#                 col_cluster_individual].apply(lambda x: unlabeled_cat if (
#                     sep in x) else x)  # re-label heterogeneous annotations
#     var_names += [set(adatas[x].var_names)]  # track what genes still there

# # Write Objects
# for x in adatas:
#     print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}\n\n{adatas[x]}\n")
#     adatas[x].write_h5ad(files_individual[x])  # write individual file

# # Plot
# if kws_cluster is not None:
#     for x in files:  # iterate sample files
#         ccs = [v for v in ["leiden_individual", col_cluster_individual
#                            ] if v in adatas[x].obs]  # columns for UMAP
#         print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}")
#         sc.pl.umap(adatas[x], color=ccs, wspace=0.4)  # plot UMAP
# del adatas  # save memory

Optimize Hyperparameters¶

Iterate different clustering parameters to ensure extraction of common cell types

Make sample-specific alterations to one sample's annotation

In [10]:
# Marker Dictionary for Annotation + Required & Prohibited Cell Types
use_mks = {**markers_predefined}
# use_mks = {**mks_collapsed}
cts_nonneuron = ["OPC", "Oligodendrocyte", "Astrocyte", "Microglial"]
req_cts = list(["Neuron"] if "Neuron" in use_mks else [
    "Excitatory", "Inhibitory"] ) + cts_nonneuron  # required cell types
prohib_cts = ["Excitatory-Inhibitory"]  # prohibited cell types

# Clustering Parameters
resn_list = [0.2, 0.5, 0.1, 0.075, 0.15, 0.3, 0.25, 0.4]
dist_list = [1.5, 0.75, 0.5, 1, 0.3, 0.4]

# Column Names  & Heterogeneous Cell Type Key/Separator
col_cluster_individual = "annotation_by_markers_individual"
unlabeled_cat = "Heterogeneous"  # if can't find one best-fit cell label
cci_scanvi = col_cluster_individual + "_heterogeneous_collapsed"
sep = " | "  # separator for heterogeneous annotations

# Clustering
for x in files:  # iterate sample files
    valid_cts = False
    print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}")
    for r in resn_list:
        if valid_cts is True:
            break
        for t in dist_list:
            if valid_cts is True:
                break
            c_i = f"leiden_individual_res{r}dist{t}"
            cai = f"{col_cluster_individual}_res{r}dist{t}"
            kws_cl = {"resolution": r, "min_dist": t, **kws_cluster}
            adatas[x] = scflow.pp.cluster(
                adatas[x], plot=False, col_celltype=c_i, **kws_cl)
            sc.tl.rank_genes_groups(
                adatas[x], c_i, n_genes=None,
                key_added=f"rank_genes_groups_{c_i}",
                copy=False)  # find markers/DEGs
            _, adatas[x] = scflow.pp.annotate_by_marker_overlap(
                adatas[x], use_mks,
                col_celltype=c_i, col_celltype_new=cai, sep=sep,
                celltypes_superhierarchical=cts_superhierarchical,
                # top_n_markers=20,
                adj_pval_threshold=1e-15,
                method="overlap_coef", inplace=True)  # annotate
            if rename_marker_based_annotation is not None:
                adatas[x].obs.loc[:, cai] = adatas[x].obs[cai].replace(
                        rename_marker_based_annotation)  # re-name
            print(list(adatas[x].obs[cai].unique()))
            adatas[x].obs.loc[:, cci_scanvi] = adatas[x].obs[
                cai].apply(lambda x: unlabeled_cat if (sep in x) else x)
            valid_cts = all([q in adatas[x].obs[cai].unique()
                             for q in req_cts])
            valid_cts = valid_cts and (all([q not in adatas[x].obs[
                cai].unique() for q in prohib_cts]))
            valid_cts = False if any(adatas[x].obs[
                cci_scanvi] == unlabeled_cat) else valid_cts
            if valid_cts is True:
                adatas[x].obs.loc[:, "leiden_individual"] = adatas[x].obs[c_i]
                adatas[x].obs.loc[:, col_cluster_individual] = adatas[
                    x].obs[cai]
                adatas[x].obs.loc[:, "kws_cluster_individual"] = str(kws_cl)
                print(kws_cl)
            else:
                adatas[x].obs = adatas[x].obs.drop([i for i in [
                    cai, c_i, cci_scanvi] if i in adatas[x].obs], axis=1)

# Detect Samples with No Valid Clustering Scheme
no_valid = []
for x in files:
    if all((i in adatas[x].obs for i in [
            "leiden_individual", col_cluster_individual])) is False:
        print(f"No valid clustering found for {x}")
        no_valid += [x]
print(f"No Valid Clustering Scheme Yet: {no_valid}")

# # Individual Tweak
# samp = "RRRM2_BRN_GC_ISS-T_YNG_GY4"
# cols_tries = [i for i in adatas[samp].obs if "leiden_" in i]
# cts_try = cols_tries[14]
# _, adatas[samp] = scflow.pp.annotate_by_marker_overlap(
#     adatas[samp], mks_collapsed, col_celltype=cts_try,
#     col_celltype_new=col_cluster_individual + "_new", sep=sep,
#     adj_pval_threshold=1e-5,
#     method="overlap_coef", inplace=True)  # annotate by markers
# adatas[samp].obs.loc[:, "leiden_individual"] = adatas[samp].obs[cts_try]
# adatas[samp].obs.loc[:, col_cluster_individual] = adatas[samp].obs[
#     col_cluster_individual + "_new"]
# adatas[samp].obs["kws_cluster_individual"] = str({"resolution": float(
#     cts_try.split("res")[1].split("dist")[0]), "min_dist": float(
#         cts_try.split("res")[1].split("dist")[1])})

# Write Objects
for x in adatas:
    print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}\n\n{adatas[x]}\n")
    adatas[x].write_h5ad(files_individual[x])  # write individual file

# Print Parameters Used
for x in adatas:
    print(adatas[x].obs["kws_cluster_individual"])

# Plot
for x in adatas:
    sc.pl.pca_variance_ratio(adatas[x], log=True)
    sc.pl.umap(adatas[x], color=[
        "leiden_individual", col_cluster_individual], wspace=0.4)
del adatas

================================================================================
RR10_BRN_GC_WT_G5
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.2...
2025-09-01 16:43:07 | [INFO] init
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Excitatory', 'Excitatory-Inhibitory', 'Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.75...
	***Performing Leiden clustering with resolution 0.2...
['Excitatory', 'Excitatory-Inhibitory', 'Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.5...
	***Performing Leiden clustering with resolution 0.2...
['Excitatory', 'Excitatory-Inhibitory', 'Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1...
	***Performing Leiden clustering with resolution 0.2...
['Excitatory', 'Excitatory-Inhibitory', 'Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.3...
	***Performing Leiden clustering with resolution 0.2...
['Excitatory', 'Excitatory-Inhibitory', 'Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.4...
	***Performing Leiden clustering with resolution 0.2...
['Excitatory', 'Excitatory-Inhibitory', 'Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory', 'Inhibitory', 'Excitatory-Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.75...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory', 'Inhibitory', 'Excitatory-Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.5...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory', 'Inhibitory', 'Excitatory-Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory', 'Inhibitory', 'Excitatory-Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.3...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory', 'Inhibitory', 'Excitatory-Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.4...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory', 'Inhibitory', 'Excitatory-Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Pericyte', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.1...
['Excitatory', 'Inhibitory', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Microglial']
{'resolution': 0.1, 'min_dist': 1.5, 'n_comps': 50}


================================================================================
RR10_BRN_FLT_WT_F1
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.2...
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Oligodendrocyte', 'Excitatory-Inhibitory', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Microglial', 'OPC', 'Endothelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.75...
	***Performing Leiden clustering with resolution 0.2...
['Oligodendrocyte', 'Excitatory-Inhibitory', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Microglial', 'OPC', 'Endothelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.5...
	***Performing Leiden clustering with resolution 0.2...
['Oligodendrocyte', 'Excitatory-Inhibitory', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Microglial', 'OPC', 'Endothelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1...
	***Performing Leiden clustering with resolution 0.2...
['Oligodendrocyte', 'Excitatory-Inhibitory', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Microglial', 'OPC', 'Endothelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.3...
	***Performing Leiden clustering with resolution 0.2...
['Oligodendrocyte', 'Excitatory-Inhibitory', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Microglial', 'OPC', 'Endothelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.4...
	***Performing Leiden clustering with resolution 0.2...
['Oligodendrocyte', 'Excitatory-Inhibitory', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Microglial', 'OPC', 'Endothelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.5...
['Microglial', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'OPC', 'Excitatory-Inhibitory', 'Endothelial', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.75...
	***Performing Leiden clustering with resolution 0.5...
['Microglial', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'OPC', 'Excitatory-Inhibitory', 'Endothelial', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.5...
	***Performing Leiden clustering with resolution 0.5...
['Microglial', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'OPC', 'Excitatory-Inhibitory', 'Endothelial', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1...
	***Performing Leiden clustering with resolution 0.5...
['Oligodendrocyte', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Microglial', 'OPC', 'Excitatory-Inhibitory', 'Endothelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.3...
	***Performing Leiden clustering with resolution 0.5...
['Oligodendrocyte', 'Inhibitory', 'Excitatory', 'Astrocyte', 'Microglial', 'OPC', 'Endothelial']
{'resolution': 0.5, 'min_dist': 0.3, 'n_comps': 50}


================================================================================
RR10_BRN_GC_WT_G1
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
	***Performing Leiden clustering with resolution 0.2...
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Excitatory', 'Inhibitory', 'Microglial', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Endothelial']
{'resolution': 0.2, 'min_dist': 1.5, 'n_comps': 50}


================================================================================
RR10_BRN_GC_WT_G9
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.2...
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Microglial', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Inhibitory', 'OPC', 'Neuroepithelial']
{'resolution': 0.2, 'min_dist': 1.5, 'n_comps': 50}


================================================================================
RR10_BRN_FLT_WT_F3
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.2...
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Excitatory', 'Oligodendrocyte', 'Inhibitory', 'OPC', 'Astrocyte', 'Neuroepithelial', 'Microglial']
{'resolution': 0.2, 'min_dist': 1.5, 'n_comps': 50}


================================================================================
RR10_BRN_GC_WT_G3
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.2...
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Inhibitory', 'OPC', 'Astrocyte', 'Neuroepithelial', 'Excitatory', 'Microglial', 'Oligodendrocyte']
{'resolution': 0.2, 'min_dist': 1.5, 'n_comps': 50}


================================================================================
RR10_BRN_FLT_WT_F7
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.2...
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Excitatory', 'Inhibitory', 'Oligodendrocyte', 'Pericyte', 'Microglial', 'OPC', 'Astrocyte', 'Endothelial']
{'resolution': 0.2, 'min_dist': 1.5, 'n_comps': 50}


================================================================================
RR10_BRN_FLT_WT_F9
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.2...
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Oligodendrocyte', 'Astrocyte', 'Excitatory', 'OPC', 'Inhibitory', 'Pericyte', 'Microglial']
{'resolution': 0.2, 'min_dist': 1.5, 'n_comps': 50}


================================================================================
RR10_BRN_FLT_WT_F5
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.2...
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Oligodendrocyte', 'Excitatory', 'Inhibitory', 'Astrocyte', 'OPC', 'Endothelial', 'Microglial', 'Pericyte']
{'resolution': 0.2, 'min_dist': 1.5, 'n_comps': 50}


================================================================================
RR10_BRN_GC_WT_G7
================================================================================
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.2...
... storing 'Group' as categorical
... storing 'sample' as categorical
... storing 'Characteristics[Organism]' as categorical
... storing 'Term Source REF' as categorical
... storing 'Term Accession Number' as categorical
... storing 'Characteristics[Strain]' as categorical
... storing 'Term Source REF.1' as categorical
... storing 'Term Accession Number.1' as categorical
... storing 'Characteristics[Animal Source]' as categorical
... storing 'Characteristics[Genotype]' as categorical
... storing 'Term Source REF.2' as categorical
... storing 'Term Accession Number.2' as categorical
... storing 'Characteristics[Sex]' as categorical
... storing 'Term Source REF.3' as categorical
... storing 'Term Accession Number.3' as categorical
... storing 'Characteristics[Material Type]' as categorical
... storing 'Term Source REF.4' as categorical
... storing 'Term Accession Number.4' as categorical
... storing 'Factor Value[Spaceflight]' as categorical
... storing 'Term Source REF.5' as categorical
... storing 'Term Accession Number.5' as categorical
... storing 'Characteristics[Age at Launch]' as categorical
... storing 'Unit' as categorical
... storing 'Term Source REF.6' as categorical
... storing 'Term Accession Number.6' as categorical
... storing 'Characteristics[Age at Euthanasia]' as categorical
... storing 'Unit.1' as categorical
... storing 'Term Source REF.7' as categorical
... storing 'Term Accession Number.7' as categorical
... storing 'Protocol REF' as categorical
... storing 'Parameter Value[habitat]' as categorical
... storing 'Unit.2' as categorical
... storing 'Term Source REF.8' as categorical
... storing 'Term Accession Number.8' as categorical
... storing 'Parameter Value[Enrichment material]' as categorical
... storing 'Parameter Value[light cycle]' as categorical
... storing 'Parameter Value[Diet]' as categorical
... storing 'Parameter Value[Feeding Schedule]' as categorical
... storing 'Parameter Value[Euthanasia Method]' as categorical
... storing 'Parameter Value[Carcass Preservation Method]' as categorical
... storing 'Unit.3' as categorical
... storing 'Term Source REF.9' as categorical
... storing 'Term Accession Number.9' as categorical
... storing 'Protocol REF.1' as categorical
... storing 'Parameter Value[Sample Preservation Method]' as categorical
... storing 'Term Source REF.10' as categorical
... storing 'Term Accession Number.10' as categorical
... storing 'Unit.4' as categorical
... storing 'Term Source REF.11' as categorical
... storing 'Term Accession Number.11' as categorical
... storing 'Comment[RFID]' as categorical
... storing 'Comment[Euthanasia Date]' as categorical
... storing 'Comment[Euthanasia Time (hh:mm:ss)]' as categorical
... storing 'Comment[BSP Dissection Date]' as categorical
... storing 'Comment[Source Description]' as categorical
... storing 'Comment[Partial Body Weight on BSP Dissection Date]' as categorical
... storing 'kws_pp_sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'interval' as categorical
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.75...
	***Performing Leiden clustering with resolution 0.2...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.5...
	***Performing Leiden clustering with resolution 0.2...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1...
	***Performing Leiden clustering with resolution 0.2...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.3...
	***Performing Leiden clustering with resolution 0.2...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.4...
	***Performing Leiden clustering with resolution 0.2...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory-Inhibitory', 'OPC', 'Excitatory', 'Inhibitory', 'Astrocyte', 'Oligodendrocyte', 'Pericyte', 'Neuroepithelial', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.75...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory-Inhibitory', 'OPC', 'Excitatory', 'Inhibitory', 'Astrocyte', 'Oligodendrocyte', 'Pericyte', 'Neuroepithelial', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.5...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory-Inhibitory', 'OPC', 'Excitatory', 'Inhibitory', 'Astrocyte', 'Oligodendrocyte', 'Pericyte', 'Neuroepithelial', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory-Inhibitory', 'OPC', 'Excitatory', 'Inhibitory', 'Astrocyte', 'Oligodendrocyte', 'Pericyte', 'Neuroepithelial', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.3...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory-Inhibitory', 'OPC', 'Excitatory', 'Inhibitory', 'Astrocyte', 'Oligodendrocyte', 'Pericyte', 'Neuroepithelial', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.4...
	***Performing Leiden clustering with resolution 0.5...
['Excitatory-Inhibitory', 'OPC', 'Excitatory', 'Inhibitory', 'Astrocyte', 'Oligodendrocyte', 'Pericyte', 'Neuroepithelial', 'Microglial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.1...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.75...
	***Performing Leiden clustering with resolution 0.1...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.5...
	***Performing Leiden clustering with resolution 0.1...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1...
	***Performing Leiden clustering with resolution 0.1...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.3...
	***Performing Leiden clustering with resolution 0.1...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.4...
	***Performing Leiden clustering with resolution 0.1...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.075...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.75...
	***Performing Leiden clustering with resolution 0.075...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.5...
	***Performing Leiden clustering with resolution 0.075...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1...
	***Performing Leiden clustering with resolution 0.075...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.3...
	***Performing Leiden clustering with resolution 0.075...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.4...
	***Performing Leiden clustering with resolution 0.075...
['Inhibitory', 'Oligodendrocyte', 'Excitatory', 'Astrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.15...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.75...
	***Performing Leiden clustering with resolution 0.15...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.5...
	***Performing Leiden clustering with resolution 0.15...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1...
	***Performing Leiden clustering with resolution 0.15...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.3...
	***Performing Leiden clustering with resolution 0.15...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 0.4...
	***Performing Leiden clustering with resolution 0.15...
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Neuroepithelial']
	***Calculating PCA with 50 components...
	***Building neighborhood...
	***Embedding UMAP with minimum distance 1.5...
	***Performing Leiden clustering with resolution 0.3...
... storing 'kws_cluster_individual' as categorical
['Inhibitory', 'OPC', 'Excitatory', 'Astrocyte', 'Oligodendrocyte', 'Pericyte', 'Neuroepithelial', 'Microglial']
{'resolution': 0.3, 'min_dist': 1.5, 'n_comps': 50}
No Valid Clustering Scheme Yet: []


================================================================================
RR10_BRN_GC_WT_G5
================================================================================

AnnData object with n_obs × n_vars = 3609 × 25298
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.1dist1.5', 'annotation_by_markers_individual_res0.1dist1.5', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5', 'leiden_individual_res0.2dist0.75', 'rank_genes_groups_leiden_individual_res0.2dist0.75', 'leiden_individual_res0.2dist0.5', 'rank_genes_groups_leiden_individual_res0.2dist0.5', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1', 'leiden_individual_res0.2dist0.3', 'rank_genes_groups_leiden_individual_res0.2dist0.3', 'leiden_individual_res0.2dist0.4', 'rank_genes_groups_leiden_individual_res0.2dist0.4', 'leiden_individual_res0.5dist1.5', 'rank_genes_groups_leiden_individual_res0.5dist1.5', 'leiden_individual_res0.5dist0.75', 'rank_genes_groups_leiden_individual_res0.5dist0.75', 'leiden_individual_res0.5dist0.5', 'rank_genes_groups_leiden_individual_res0.5dist0.5', 'leiden_individual_res0.5dist1', 'rank_genes_groups_leiden_individual_res0.5dist1', 'leiden_individual_res0.5dist0.3', 'rank_genes_groups_leiden_individual_res0.5dist0.3', 'leiden_individual_res0.5dist0.4', 'rank_genes_groups_leiden_individual_res0.5dist0.4', 'leiden_individual_res0.1dist1.5', 'rank_genes_groups_leiden_individual_res0.1dist1.5'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

... storing 'kws_cluster_individual' as categorical

================================================================================
RR10_BRN_FLT_WT_F1
================================================================================

AnnData object with n_obs × n_vars = 9930 × 30180
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.5dist0.3', 'annotation_by_markers_individual_res0.5dist0.3', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5', 'leiden_individual_res0.2dist0.75', 'rank_genes_groups_leiden_individual_res0.2dist0.75', 'leiden_individual_res0.2dist0.5', 'rank_genes_groups_leiden_individual_res0.2dist0.5', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1', 'leiden_individual_res0.2dist0.3', 'rank_genes_groups_leiden_individual_res0.2dist0.3', 'leiden_individual_res0.2dist0.4', 'rank_genes_groups_leiden_individual_res0.2dist0.4', 'leiden_individual_res0.5dist1.5', 'rank_genes_groups_leiden_individual_res0.5dist1.5', 'leiden_individual_res0.5dist0.75', 'rank_genes_groups_leiden_individual_res0.5dist0.75', 'leiden_individual_res0.5dist0.5', 'rank_genes_groups_leiden_individual_res0.5dist0.5', 'leiden_individual_res0.5dist1', 'rank_genes_groups_leiden_individual_res0.5dist1', 'leiden_individual_res0.5dist0.3', 'rank_genes_groups_leiden_individual_res0.5dist0.3'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

... storing 'kws_cluster_individual' as categorical

================================================================================
RR10_BRN_GC_WT_G1
================================================================================

AnnData object with n_obs × n_vars = 6826 × 28485
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1.5', 'annotation_by_markers_individual_res0.2dist1.5', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

... storing 'kws_cluster_individual' as categorical

================================================================================
RR10_BRN_GC_WT_G9
================================================================================

AnnData object with n_obs × n_vars = 6823 × 26600
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1.5', 'annotation_by_markers_individual_res0.2dist1.5', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

... storing 'kws_cluster_individual' as categorical

================================================================================
RR10_BRN_FLT_WT_F3
================================================================================

AnnData object with n_obs × n_vars = 9550 × 29881
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1.5', 'annotation_by_markers_individual_res0.2dist1.5', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

... storing 'kws_cluster_individual' as categorical

================================================================================
RR10_BRN_GC_WT_G3
================================================================================

AnnData object with n_obs × n_vars = 12739 × 30071
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1.5', 'annotation_by_markers_individual_res0.2dist1.5', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

... storing 'kws_cluster_individual' as categorical

================================================================================
RR10_BRN_FLT_WT_F7
================================================================================

AnnData object with n_obs × n_vars = 3994 × 25369
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1.5', 'annotation_by_markers_individual_res0.2dist1.5', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

... storing 'kws_cluster_individual' as categorical

================================================================================
RR10_BRN_FLT_WT_F9
================================================================================

AnnData object with n_obs × n_vars = 2924 × 24858
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1.5', 'annotation_by_markers_individual_res0.2dist1.5', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

... storing 'kws_cluster_individual' as categorical

================================================================================
RR10_BRN_FLT_WT_F5
================================================================================

AnnData object with n_obs × n_vars = 2833 × 24519
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1.5', 'annotation_by_markers_individual_res0.2dist1.5', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

... storing 'kws_cluster_individual' as categorical

================================================================================
RR10_BRN_GC_WT_G7
================================================================================

AnnData object with n_obs × n_vars = 3098 × 25568
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.3dist1.5', 'annotation_by_markers_individual_res0.3dist1.5', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
    var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5', 'leiden_individual_res0.2dist0.75', 'rank_genes_groups_leiden_individual_res0.2dist0.75', 'leiden_individual_res0.2dist0.5', 'rank_genes_groups_leiden_individual_res0.2dist0.5', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1', 'leiden_individual_res0.2dist0.3', 'rank_genes_groups_leiden_individual_res0.2dist0.3', 'leiden_individual_res0.2dist0.4', 'rank_genes_groups_leiden_individual_res0.2dist0.4', 'leiden_individual_res0.5dist1.5', 'rank_genes_groups_leiden_individual_res0.5dist1.5', 'leiden_individual_res0.5dist0.75', 'rank_genes_groups_leiden_individual_res0.5dist0.75', 'leiden_individual_res0.5dist0.5', 'rank_genes_groups_leiden_individual_res0.5dist0.5', 'leiden_individual_res0.5dist1', 'rank_genes_groups_leiden_individual_res0.5dist1', 'leiden_individual_res0.5dist0.3', 'rank_genes_groups_leiden_individual_res0.5dist0.3', 'leiden_individual_res0.5dist0.4', 'rank_genes_groups_leiden_individual_res0.5dist0.4', 'leiden_individual_res0.1dist1.5', 'rank_genes_groups_leiden_individual_res0.1dist1.5', 'leiden_individual_res0.1dist0.75', 'rank_genes_groups_leiden_individual_res0.1dist0.75', 'leiden_individual_res0.1dist0.5', 'rank_genes_groups_leiden_individual_res0.1dist0.5', 'leiden_individual_res0.1dist1', 'rank_genes_groups_leiden_individual_res0.1dist1', 'leiden_individual_res0.1dist0.3', 'rank_genes_groups_leiden_individual_res0.1dist0.3', 'leiden_individual_res0.1dist0.4', 'rank_genes_groups_leiden_individual_res0.1dist0.4', 'leiden_individual_res0.075dist1.5', 'rank_genes_groups_leiden_individual_res0.075dist1.5', 'leiden_individual_res0.075dist0.75', 'rank_genes_groups_leiden_individual_res0.075dist0.75', 'leiden_individual_res0.075dist0.5', 'rank_genes_groups_leiden_individual_res0.075dist0.5', 'leiden_individual_res0.075dist1', 'rank_genes_groups_leiden_individual_res0.075dist1', 'leiden_individual_res0.075dist0.3', 'rank_genes_groups_leiden_individual_res0.075dist0.3', 'leiden_individual_res0.075dist0.4', 'rank_genes_groups_leiden_individual_res0.075dist0.4', 'leiden_individual_res0.15dist1.5', 'rank_genes_groups_leiden_individual_res0.15dist1.5', 'leiden_individual_res0.15dist0.75', 'rank_genes_groups_leiden_individual_res0.15dist0.75', 'leiden_individual_res0.15dist0.5', 'rank_genes_groups_leiden_individual_res0.15dist0.5', 'leiden_individual_res0.15dist1', 'rank_genes_groups_leiden_individual_res0.15dist1', 'leiden_individual_res0.15dist0.3', 'rank_genes_groups_leiden_individual_res0.15dist0.3', 'leiden_individual_res0.15dist0.4', 'rank_genes_groups_leiden_individual_res0.15dist0.4', 'leiden_individual_res0.3dist1.5', 'rank_genes_groups_leiden_individual_res0.3dist1.5'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'log1p', 'scaled'
    obsp: 'distances', 'connectivities'

AAACAGCCAGAAACGT-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
AAACAGCCATTAAGTC-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
AAACATGCAAATGCCC-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
AAACATGCAAGGTATA-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
AAACATGCAGGTTACC-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
                                            ...                        
TTTGTGGCAACCGCCA-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
TTTGTGGCAATCATGT-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
TTTGTGGCATTATGCG-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCCGGTATG-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTCCTCCAA-1    {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'...
Name: kws_cluster_individual, Length: 3609, dtype: category
Categories (1, object): ['{'resolution': 0.1, 'min_dist': 1.5, 'n_comps...]
AAACAGCCAATTATGC-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
AAACAGCCACAATACT-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
AAACAGCCAGAACCGA-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
AAACAGCCAGGATGGC-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
AAACAGCCATCACTTC-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
                                            ...                        
TTTGTTGGTACTTCAC-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
TTTGTTGGTCATCCTG-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
TTTGTTGGTCCGCTGT-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
TTTGTTGGTTGTAACG-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
TTTGTTGGTTGTCATC-1    {'resolution': 0.5, 'min_dist': 0.3, 'n_comps'...
Name: kws_cluster_individual, Length: 9930, dtype: category
Categories (1, object): ['{'resolution': 0.5, 'min_dist': 0.3, 'n_comps...]
AAACAGCCAAGTTATC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCAGCATGGA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCATAATCCG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACATGCAGGACCTT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACATGCAGTAATAG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
                                            ...                        
TTTGTGTTCTTTGAGA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTAAGCACC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTACGTTTC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTTAATCGG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTTGAATCG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
Name: kws_cluster_individual, Length: 6826, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1.5, 'n_comps...]
AAACAGCCACAGGATG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCAGCCAGTT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCAGGAACAT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCAGTTATCG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCATAATCCG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
                                            ...                        
TTTGTGTTCATCCTGC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCTAAGGAG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTAGCTGGT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTCGCAATA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTGACCTGG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
Name: kws_cluster_individual, Length: 6823, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1.5, 'n_comps...]
AAACAGCCAAACCTAT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCAAGGTACG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCAATGCGCT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCAGGAACAT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCAGTTAGCC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
                                            ...                        
TTTGTGTTCCTAACGG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCTCTAGCC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTATTGTCC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTGACCTGG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTGTCCAAA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
Name: kws_cluster_individual, Length: 9550, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1.5, 'n_comps...]
AAACAGCCAAGGGTTG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCACCCACAG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCATGATTGT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCATGTTGCA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACATGCAAACTAAG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
                                            ...                        
TTTGTTGGTTAAGGTT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTTAATGCG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTTACAAAC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTTGAATCG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTTTATCGC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
Name: kws_cluster_individual, Length: 12739, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1.5, 'n_comps...]
AAACAGCCAATGAGGT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACAGCCAGGTTATT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACATGCAGTAGGTG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACCAACAAGCTTTG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACCAACACAATGTT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
                                            ...                        
TTTGTGAAGGGCTTAT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGGCATTCCTCG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTGATGGCT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTGTTTGCT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTTGGTTCTTTAG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
Name: kws_cluster_individual, Length: 3994, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1.5, 'n_comps...]
AAACATGCAAGGTGCA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACATGCACCTGGTG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACATGCAGCAACAG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACATGCATACTCCT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACCAACAAGGCCAA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
                                            ...                        
TTTGTGAAGTGGCGGA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGAAGTTTCCGC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGGCATGACTAT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGGCATGTCAAT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCATAATCG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
Name: kws_cluster_individual, Length: 2924, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1.5, 'n_comps...]
AAACAGCCATGAGTTT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACATGCACCAGCAT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACATGCATCCCTCA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACCAACAACCTGGT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
AAACCAACAATGAGGT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
                                            ...                        
TTTGTCCCAAATATCC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTCTAGTTAGTGC-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGGCACCCACCT-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCATGCTAA-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCGCTAGTG-1    {'resolution': 0.2, 'min_dist': 1.5, 'n_comps'...
Name: kws_cluster_individual, Length: 2833, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1.5, 'n_comps...]
AAACCAACACAGGGAC-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
AAACCGAAGAACCTAC-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
AAACCGCGTACGGTAC-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
AAACCGCGTCTCAGCC-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
AAACCGGCATGCTATG-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
                                            ...                        
TTTGTGGCACCTCAGG-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCACTCAAA-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCATCGTTT-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCCGCTAGA-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
TTTGTGTTCTGCAAGT-1    {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'...
Name: kws_cluster_individual, Length: 3098, dtype: category
Categories (1, object): ['{'resolution': 0.3, 'min_dist': 1.5, 'n_comps...]
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Concatenate & Integrate¶

If you are concerned about hard drive space, you may want to delete file_concat and files_individual after running this cell.

In [11]:
%%time

# Integration Options
cct_available = kws_cluster is not None and (
    markers_predefined is not None)  # individual annotations available?
kws_vi = {"early_stopping": True,
          "batch_size": 1024,  # raise/lower if more/less than 16 GB VRAM
          "max_epochs": 100,
          "accelerator": "gpu",
          "categorical_covariate_keys": covariates_categorical,
          "continuous_covariate_keys": covariates_continuous,
          "n_latent": 40, "n_hidden": 400}  # scVI/scANVI arguments
if covariates_categorical is not None:
    print(metadata[0][covariates_categorical + [
        col_group]].value_counts().unstack(col_group).replace(
            np.nan, 0).astype(int))
kws_integrate = {
    "col_celltype": cci_scanvi if cct_available else None,
    "flavor": "scanvi",
    # "flavor": "scvi",
    # "flavor": "scanorama",
    # "flavor": "harmony",
    "kws_pp": None, "kws_cluster": None,
    "vars_regress_out": vars_regress_out,
    "max_value": 10, "zero_center": True, "target_sum": 1e4,
    "n_top_genes": n_top_genes,
    "join": join_method, "merge": "same",
    "col_batch": None,  # suppress using batch as covariate
    "drop_non_hvgs": True,  # just for the integration part
    "use_rapids": True,
    "fill_value": np.nan if join_method == "outer" else None,
    "out_file": file_concat, **kws_vi
}

# If scVI/scANVI Integration & Individual Annotations Available
if kws_integrate["flavor"] in ["scvi", "scanvi"] and cct_available is True:
    kws_integrate.update({"unlabeled_category": unlabeled_cat})

# Integrate & Store Integration Parameters in Object
self = scflow.Rna(files_individual, col_sample=col_sample,
                  col_batch=col_batch, kws_integrate=kws_integrate)
self.rna.obs = self.rna.obs.assign(kws_integrate=str(kws_integrate))

# Write Files for Processed/Integrated Objects?
if overwrite is True or not os.path.exists(file_new):
    self.rna.write_h5ad(file_new)

# Display
print(self.rna)
self.rna.obs
>>>Concatenating data...

>>>Re-Normalizing & Finding HVGs for Overall Data...
No description has been provided for this image
>>>Subsetting to top 2000 HVGs...

>>>Integrating with respect to sample (SCANVI)...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
	***Using counts layer for scanvi...
	***Using None, None as covariates...
	***Setting up scVI model: {'n_latent': 40, 'n_hidden': 400}...
	***Traning scVI: {'max_epochs': 100, 'accelerator': 'gpu', 'batch_size': 1024, 'early_stopping': True}...
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training:   0%|          | 0/100 [00:00<?, ?it/s]
`Trainer.fit` stopped: `max_epochs=100` reached.
	***Setting up scANVI model: {'n_latent': 40, 'n_hidden': 400}...
	***Traning scANVI: {'max_epochs': 100, 'accelerator': 'gpu', 'batch_size': 1024, 'early_stopping': True}...
INFO     Training for 100 epochs.                                                                                  
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training:   0%|          | 0/100 [00:00<?, ?it/s]
`Trainer.fit` stopped: `max_epochs=100` reached.
... storing 'annotation_scanvi' as categorical
No description has been provided for this image
count    21692.000000
mean      4589.334686
std       7441.397028
min         34.000000
25%        369.000000
50%       1510.500000
75%       5308.500000
max      62323.000000
Name: n_cells_by_counts, dtype: float64
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
... storing 'kws_integrate' as categorical
AnnData object with n_obs × n_vars = 62326 × 21692
    obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Animal Source]', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Characteristics[Material Type]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Spaceflight]', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Age at Launch]', 'Unit', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[Enrichment material]', 'Parameter Value[light cycle]', 'Parameter Value[Diet]', 'Parameter Value[Feeding Schedule]', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Carcass Preservation Method]', 'Parameter Value[Body Weight at Euthanasia]', 'Unit.3', 'Term Source REF.9', 'Term Accession Number.9', 'Protocol REF.1', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.10', 'Term Accession Number.10', 'Parameter Value[Sample Storage Temperature]', 'Unit.4', 'Term Source REF.11', 'Term Accession Number.11', 'Comment[RFID]', 'Comment[ALSDA Subject ID]', 'Comment[Euthanasia Date]', 'Comment[Euthanasia Time (hh:mm:ss)]', 'Comment[BSP Dissection Date]', 'Comment[Source Description]', 'Comment[Partial Body Weight on BSP Dissection Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual', 'annotation_scanvi', 'kws_integrate'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
    uns: 'log1p', 'hvg', 'sample_colors'
    obsm: 'X_pca', 'X_umap', 'X_scANVI', 'X_pca_old'
    layers: 'counts', 'log1p', 'scaled'
CPU times: user 3min 37s, sys: 2min 1s, total: 5min 39s
Wall time: 4min 56s
Out[11]:
Group sample Characteristics[Organism] Term Source REF Term Accession Number Characteristics[Strain] Term Source REF.1 Term Accession Number.1 Characteristics[Animal Source] Characteristics[Genotype] Term Source REF.2 Term Accession Number.2 Characteristics[Sex] Term Source REF.3 Term Accession Number.3 Characteristics[Material Type] Term Source REF.4 Term Accession Number.4 Factor Value[Spaceflight] Term Source REF.5 Term Accession Number.5 Characteristics[Age at Launch] Unit Term Source REF.6 Term Accession Number.6 Characteristics[Age at Euthanasia] Unit.1 Term Source REF.7 Term Accession Number.7 Protocol REF Parameter Value[habitat] Parameter Value[duration] Unit.2 Term Source REF.8 Term Accession Number.8 Parameter Value[Enrichment material] Parameter Value[light cycle] Parameter Value[Diet] Parameter Value[Feeding Schedule] Parameter Value[Euthanasia Method] Parameter Value[Carcass Preservation Method] Parameter Value[Body Weight at Euthanasia] Unit.3 Term Source REF.9 Term Accession Number.9 Protocol REF.1 Parameter Value[Sample Preservation Method] Term Source REF.10 Term Accession Number.10 Parameter Value[Sample Storage Temperature] Unit.4 Term Source REF.11 Term Accession Number.11 Comment[RFID] Comment[ALSDA Subject ID] Comment[Euthanasia Date] Comment[Euthanasia Time (hh:mm:ss)] Comment[BSP Dissection Date] Comment[Source Description] Comment[Partial Body Weight on BSP Dissection Date] n_cells_original_sample kws_pp_sample n_genes_by_counts total_counts log1p_n_genes_by_counts log1p_total_counts total_counts_mt pct_counts_mt log1p_total_counts_mt total_counts_ribo pct_counts_ribo log1p_total_counts_ribo total_counts_hb pct_counts_hb log1p_total_counts_hb n_counts n_genes doublet_score predicted_doublet annotation_by_markers_individual_heterogeneous_collapsed leiden_individual annotation_by_markers_individual kws_cluster_individual annotation_scanvi kws_integrate
AAACAGCCAGAAACGT-1_RR10_BRN_GC_WT_G5 Ground Control RR10_BRN_GC_WT_G5 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 23.68 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E3C500B24 800 05-Jan-2021 06:18:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 4231 {'min_max_genes': [336, None], 'min_max_cells'... 2730 3351.682373 7.912423 8.117516 21.065426 0.628503 3.094012 26.337223 0.785791 3.308249 0.000000 0.000000 0.000000 7117.0 2742 0.034528 False Excitatory 2 Excitatory {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'... Excitatory {'col_celltype': 'annotation_by_markers_indivi...
AAACAGCCATTAAGTC-1_RR10_BRN_GC_WT_G5 Ground Control RR10_BRN_GC_WT_G5 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 23.68 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E3C500B24 800 05-Jan-2021 06:18:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 4231 {'min_max_genes': [336, None], 'min_max_cells'... 2237 3220.274658 7.713338 8.077533 8.841228 0.274549 2.286581 16.175243 0.502294 2.843469 0.000000 0.000000 0.000000 4863.0 2246 0.059578 False Excitatory 0 Excitatory {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'... Excitatory {'col_celltype': 'annotation_by_markers_indivi...
AAACATGCAAATGCCC-1_RR10_BRN_GC_WT_G5 Ground Control RR10_BRN_GC_WT_G5 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 23.68 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E3C500B24 800 05-Jan-2021 06:18:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 4231 {'min_max_genes': [336, None], 'min_max_cells'... 2345 3278.046875 7.760467 8.095308 12.397192 0.378188 2.595045 23.126621 0.705500 3.183316 0.000000 0.000000 0.000000 4991.0 2353 0.086538 False Excitatory 0 Excitatory {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'... Excitatory {'col_celltype': 'annotation_by_markers_indivi...
AAACATGCAAGGTATA-1_RR10_BRN_GC_WT_G5 Ground Control RR10_BRN_GC_WT_G5 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 23.68 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E3C500B24 800 05-Jan-2021 06:18:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 4231 {'min_max_genes': [336, None], 'min_max_cells'... 2717 3500.315674 7.907652 8.160894 6.348201 0.181361 1.994455 28.441347 0.812537 3.382400 0.964261 0.027548 0.675116 6176.0 2731 0.119011 False Excitatory 0 Excitatory {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'... Excitatory {'col_celltype': 'annotation_by_markers_indivi...
AAACATGCAGGTTACC-1_RR10_BRN_GC_WT_G5 Ground Control RR10_BRN_GC_WT_G5 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 23.68 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E3C500B24 800 05-Jan-2021 06:18:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 4231 {'min_max_genes': [336, None], 'min_max_cells'... 1376 2542.810547 7.227662 7.841418 10.975826 0.431642 2.482890 17.603149 0.692271 2.923331 1.580019 0.062137 0.947797 2599.0 1381 0.065880 False Inhibitory 5 Inhibitory {'resolution': 0.1, 'min_dist': 1.5, 'n_comps'... Inhibitory {'col_celltype': 'annotation_by_markers_indivi...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
TTTGTGGCACCTCAGG-1_RR10_BRN_GC_WT_G7 Ground Control RR10_BRN_GC_WT_G7 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 21.28 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E37071254 800 05-Jan-2021 08:26:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 3926 {'min_max_genes': [331, None], 'min_max_cells'... 2022 3000.386230 7.612337 8.006829 7.447003 0.248201 2.133812 21.830278 0.727582 3.128088 0.000000 0.000000 0.000000 4570.0 2032 0.006627 False Excitatory 3 Excitatory {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'... Excitatory {'col_celltype': 'annotation_by_markers_indivi...
TTTGTGTTCACTCAAA-1_RR10_BRN_GC_WT_G7 Ground Control RR10_BRN_GC_WT_G7 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 21.28 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E37071254 800 05-Jan-2021 08:26:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 3926 {'min_max_genes': [331, None], 'min_max_cells'... 515 1493.017090 6.246107 7.309224 12.003338 0.803965 2.565206 21.173267 1.418153 3.098887 0.000000 0.000000 0.000000 697.0 523 0.063116 False Pericyte 5 Pericyte {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'... Pericyte {'col_celltype': 'annotation_by_markers_indivi...
TTTGTGTTCATCGTTT-1_RR10_BRN_GC_WT_G7 Ground Control RR10_BRN_GC_WT_G7 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 21.28 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E37071254 800 05-Jan-2021 08:26:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 3926 {'min_max_genes': [331, None], 'min_max_cells'... 2202 3061.453125 7.697575 8.026972 4.101108 0.133960 1.629458 21.009678 0.686265 3.091482 0.000000 0.000000 0.000000 5134.0 2214 0.017561 False Excitatory 3 Excitatory {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'... Excitatory {'col_celltype': 'annotation_by_markers_indivi...
TTTGTGTTCCGCTAGA-1_RR10_BRN_GC_WT_G7 Ground Control RR10_BRN_GC_WT_G7 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 21.28 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E37071254 800 05-Jan-2021 08:26:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 3926 {'min_max_genes': [331, None], 'min_max_cells'... 2257 3195.966797 7.722235 8.069958 23.734081 0.742626 3.208182 39.391556 1.232540 3.698621 0.000000 0.000000 0.000000 4870.0 2275 0.028606 False Excitatory 11 Excitatory {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'... Excitatory {'col_celltype': 'annotation_by_markers_indivi...
TTTGTGTTCTGCAAGT-1_RR10_BRN_GC_WT_G7 Ground Control RR10_BRN_GC_WT_G7 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... B6129SF2/J OSD https://osdr.nasa.gov/ Jackson Laboratory Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Female MESH http://purl.bioontology.org/ontology/MESH/D005260 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819 Ground Control OSD https://osdr.nasa.gov/ 14 - 15 week UO http://purl.obolibrary.org/obo/UO_0000034 18 -19 week UO http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) 28 day UO http://purl.obolibrary.org/obo/UO_0000033 Cocoons 12 h light/dark cycle, lights on at 7:00 GMT Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum Bilateral thoracotomy with sedation, Ketamine/... Cryochiller 21.28 gram UO http://purl.obolibrary.org/obo/UO_0000021 sample collection Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius UO http://purl.obolibrary.org/obo/UO_0000027 6E37071254 800 05-Jan-2021 08:26:00 EST 14-May-2021 Frozen carcass dissected on Earth post-flight Not Available 3926 {'min_max_genes': [331, None], 'min_max_cells'... 1470 2666.808594 7.293698 7.889013 12.968199 0.486282 2.636783 24.679865 0.925446 3.245707 0.000000 0.000000 0.000000 2650.0 1484 0.008061 False Astrocyte 8 Astrocyte {'resolution': 0.3, 'min_dist': 1.5, 'n_comps'... Astrocyte {'col_celltype': 'annotation_by_markers_indivi...

62326 rows × 85 columns

Benchmark Integration¶

In [ ]:
# if kws_integrate["flavor"] == "scanvi":
#     try:
#         results_integration_benchmark = scflow.pp.benchmark_integration(
#             self.rna, col_sample, col_celltype="annotation_scanvi")
#     except Exception as err:
#         print(err)

Clustering¶

Perform PCA, UMAP embedding, and Leiden clustering on the integrated object

Marker gene-related code looks at top markers by log2fold-change and adjusted p-value cutoffs and sorts by adjusted p-values. Plots for predefined marker expression by cluster (if available) and cluster DEGs are created. Use kind = ["heat", "dot"] to get dot plots too.

In [31]:
%%time

# Clustering Options
resolution, min_dist = 0.07, 2
# resolution, min_dist = 0.015, 1.5
# resolution, min_dist = 0.022, 1.5
# resolution, min_dist = 0.025, 1
# resolution, min_dist = 0.027, 1.3
# resolution, min_dist = 0.012, 1.5
# resolution, min_dist = 0.027, 1.3
# resolution, min_dist = 0.035, 1.3
# resolution, min_dist = 0.02, 0.6
# resolution, min_dist = 0.018, 0.7
# resolution, min_dist = 0.012, 0.5
# resolution, min_dist = 0.0115, 1.5
# resolution, min_dist = 0.011, 0.5
# n_neighbors = 100
# n_neighbors = 20
n_neighbors = 80
cct = "leiden"  # key added/column name

# Set Default Cell Type Column
col_celltype = cct
self._info["col_celltype"] = col_celltype

# Clustering
self.cluster(col_celltype=cct, resolution=resolution, min_dist=min_dist,
             kws_pca=False, layer="scaled",
             kws_neighbors=dict(n_neighbors=n_neighbors))  # cluster
self.rna.obs = self.rna.obs.assign(**{
       f"{cct}_resolution": resolution}).assign(
              **{f"{cct}_n_neighbors": n_neighbors}).assign(
                     **{f"{cct}_min_dist": min_dist})  # store parameters
_ = self.plot(kind="umap", wspace=0.5, palette="tab20",
              color=[col_batch, "leiden", col_sample])  # plot UMAP
print(self.rna.obs["leiden"].value_counts().to_frame("n_cells"))  # N/cluster
print(self.rna.obs.groupby(col_sample).apply(lambda x: x[
       cct].value_counts(), include_groups=False).unstack(1))  # by group

# DEGs (One Cluster versus All)
self.find_markers(col_celltype=cct)  # DEGs by cluster
markers_df = self.get_markers_df(
    n_genes=15, col_celltype=cct,
    p_threshold=1e-10, log2fc_threshold=1.5, log2fc_threshold_abs=False)
markers_dict = dict(markers_df.groupby(cct).apply(
    lambda x: list(x.reset_index().names)))  # dictionary version of df
_ = self.plot(genes=markers_dict, figsize=(15, 15),
              layer="scaled", standard_scale="obs", kind="heat")
if markers_predefined is not None:
    mks_c = dict(zip(markers_predefined, [markers_predefined[x].intersection(
           self.rna.var_names) for x in markers_predefined]))
    mks_c["Inhibitory"] = mks_c["Inhibitory"].difference(mks_c["Excitatory"])
    mks_c["Excitatory"] = mks_c["Excitatory"].difference(mks_c["Inhibitory"])
    _ = self.plot(genes=mks_c, figsize=(15, 15), vmax=0.7,
                  layer="scaled", standard_scale="obs", kind="heat")
markers_df
	***Building neighborhood with 80 neighbors...
	***Embedding UMAP with minimum distance 2...
	***Performing Leiden clustering with resolution 0.07...
        n_cells
leiden         
0         28057
3         10182
4          6475
1          5940
5          4602
6          3056
2          2846
7          1168
leiden                 0     1    2     3     4    5    6    7
sample                                                        
RR10_BRN_GC_WT_G5   1312   329  259  1073    17  259  240  120
RR10_BRN_FLT_WT_F1  3841  1333  343  1132  1917  855  387  122
RR10_BRN_GC_WT_G1   3722   456  277  1337     0  464  457  113
RR10_BRN_GC_WT_G9   2846   441  620  1717    32  449  520  198
RR10_BRN_FLT_WT_F3  5571   716  724  1278    38  596  422  205
RR10_BRN_GC_WT_G3   5949  1111  271  1016  3149  779  329  135
RR10_BRN_FLT_WT_F7  1817   586  199   657    53  392  215   75
RR10_BRN_FLT_WT_F9  1082   254   92   877   134  284   91  110
RR10_BRN_FLT_WT_F5   835   427   28   599   424  265  207   48
RR10_BRN_GC_WT_G7   1082   287   33   496   711  259  188   42
No description has been provided for this image
No description has been provided for this image
WARNING: Gene labels are not shown when more than 50 genes are visualized. To show gene labels set `show_gene_labels=True`
No description has been provided for this image
WARNING: Gene labels are not shown when more than 50 genes are visualized. To show gene labels set `show_gene_labels=True`
No description has been provided for this image
CPU times: user 21.8 s, sys: 1.82 s, total: 23.6 s
Wall time: 9.18 s
Out[31]:
scores logfoldchanges pvals pvals_adj
leiden names
0 Dpp6 115.732620 1.922254 0.000000e+00 0.000000e+00
Nxph1 110.448204 4.304471 0.000000e+00 0.000000e+00
Inpp4b 105.794746 2.575222 0.000000e+00 0.000000e+00
Tenm1 89.112740 1.756814 0.000000e+00 0.000000e+00
Adarb2 88.722931 2.732858 0.000000e+00 0.000000e+00
Zfp521 43.008194 1.591796 0.000000e+00 0.000000e+00
Zic1 42.607590 3.144535 0.000000e+00 0.000000e+00
St8sia4 41.124001 2.063038 0.000000e+00 0.000000e+00
Ankfn1 39.430374 1.889275 0.000000e+00 0.000000e+00
Tmem255a 39.406651 2.316693 0.000000e+00 0.000000e+00
Lef1 39.224106 3.134523 0.000000e+00 0.000000e+00
Gm45341 38.945236 2.151298 0.000000e+00 0.000000e+00
Insyn2b 38.894066 2.948663 0.000000e+00 0.000000e+00
Dock11 38.471752 1.578298 0.000000e+00 0.000000e+00
Col25a1 59.337337 1.831207 0.000000e+00 0.000000e+00
3 Ptprd 204.621841 2.562793 0.000000e+00 0.000000e+00
Kcnq5 186.737274 2.981804 0.000000e+00 0.000000e+00
Nrg1 179.353333 2.920468 0.000000e+00 0.000000e+00
Nrg3 175.936340 2.308790 0.000000e+00 0.000000e+00
Csmd1 174.603271 2.145468 0.000000e+00 0.000000e+00
Celf2 167.896011 2.210736 0.000000e+00 0.000000e+00
Lingo2 167.705826 2.885069 0.000000e+00 0.000000e+00
Nav3 165.544708 2.181725 0.000000e+00 0.000000e+00
Kcnh7 164.178696 2.971062 0.000000e+00 0.000000e+00
Mef2c 164.118423 2.983392 0.000000e+00 0.000000e+00
Kalrn 162.735413 2.160930 0.000000e+00 0.000000e+00
Dpp10 158.968414 3.387660 0.000000e+00 0.000000e+00
Kctd16 118.807434 2.200214 0.000000e+00 0.000000e+00
Meg3 115.624359 1.524340 0.000000e+00 0.000000e+00
Mlip 94.051727 2.988306 0.000000e+00 0.000000e+00
7 Erbb4 158.453613 5.175955 0.000000e+00 0.000000e+00
Grip1 95.315460 4.153377 0.000000e+00 0.000000e+00
Adarb2 94.122925 6.116269 0.000000e+00 0.000000e+00
Tcf4 88.968124 2.534769 0.000000e+00 0.000000e+00
Snhg11 79.849396 1.794310 0.000000e+00 0.000000e+00
Meg3 78.512436 1.601473 0.000000e+00 0.000000e+00
Galntl6 68.108284 3.983751 0.000000e+00 0.000000e+00
Dlx6os1 68.018387 5.579965 0.000000e+00 0.000000e+00
Zfp536 52.880795 3.409306 5.609284e-317 4.345593e-314
Cntnap2 51.291580 2.383930 4.533505e-313 3.391061e-310
Nrg3 43.308628 1.546490 3.549351e-261 1.974167e-258
Sox2ot 44.444321 3.061857 8.076119e-257 4.272858e-254
Sntg1 41.728512 1.914941 1.670851e-238 6.970018e-236
Slc2a13 41.869816 2.449387 1.477733e-237 5.828181e-235
Zmat4 40.749634 2.873551 1.834914e-228 6.525074e-226
5 Gpc5 191.311218 6.749896 0.000000e+00 0.000000e+00
Slc1a2 158.136795 5.445472 0.000000e+00 0.000000e+00
Kcnn2 52.715080 2.271234 0.000000e+00 0.000000e+00
Nwd1 52.641029 5.657287 0.000000e+00 0.000000e+00
Phka1 52.534924 3.769710 0.000000e+00 0.000000e+00
Macf1 51.146343 1.702287 0.000000e+00 0.000000e+00
F3 51.108582 5.415951 0.000000e+00 0.000000e+00
Rgs20 50.708038 3.179070 0.000000e+00 0.000000e+00
Slc6a11 50.149548 4.892387 0.000000e+00 0.000000e+00
Pla2g7 49.924992 4.934491 0.000000e+00 0.000000e+00
Mt1 49.288948 3.417319 0.000000e+00 0.000000e+00
Bcan 49.166378 4.112677 0.000000e+00 0.000000e+00
Gm20713 48.207859 4.497130 0.000000e+00 0.000000e+00
Rmst 47.923149 2.810101 0.000000e+00 0.000000e+00
Appl2 47.763943 3.331866 0.000000e+00 0.000000e+00
2 Celf2 140.025757 1.843905 0.000000e+00 0.000000e+00
Nrg3 128.191055 1.955656 0.000000e+00 0.000000e+00
Grin2a 118.028183 2.536846 0.000000e+00 0.000000e+00
Csmd1 111.173851 1.603665 0.000000e+00 0.000000e+00
Grm5 110.561127 1.936699 0.000000e+00 0.000000e+00
Meg3 110.402870 1.667266 0.000000e+00 0.000000e+00
Epha6 83.489983 2.999736 0.000000e+00 0.000000e+00
Pde1a 81.820412 2.680102 0.000000e+00 0.000000e+00
Tafa1 81.360863 3.866324 0.000000e+00 0.000000e+00
Tenm2 79.103287 1.967414 0.000000e+00 0.000000e+00
Nav3 78.408066 1.516752 0.000000e+00 0.000000e+00
Gm20754 60.253918 2.426447 0.000000e+00 0.000000e+00
Cttnbp2 60.196331 1.744821 0.000000e+00 0.000000e+00
Shisa6 60.036594 3.209508 0.000000e+00 0.000000e+00
Gria3 59.887257 1.714430 0.000000e+00 0.000000e+00
1 Plp1 412.365082 7.318904 0.000000e+00 0.000000e+00
Pde4b 232.140137 4.342822 0.000000e+00 0.000000e+00
Mbp 206.797119 5.418245 0.000000e+00 0.000000e+00
Nkain2 202.465622 3.081276 0.000000e+00 0.000000e+00
Plcl1 186.147675 4.822429 0.000000e+00 0.000000e+00
Pcdh9 184.831909 2.371376 0.000000e+00 0.000000e+00
St18 183.953705 6.560849 0.000000e+00 0.000000e+00
Cdk19 66.729935 3.306992 0.000000e+00 0.000000e+00
Cdc37l1 66.473412 2.961321 0.000000e+00 0.000000e+00
Erbb4 64.296539 2.400096 0.000000e+00 0.000000e+00
Cldn11 64.223267 5.166214 0.000000e+00 0.000000e+00
Tbc1d5 63.344940 2.439339 0.000000e+00 0.000000e+00
Aspa 62.900658 5.280020 0.000000e+00 0.000000e+00
Apod 62.140999 4.926096 0.000000e+00 0.000000e+00
Gm16168 61.738800 4.230247 0.000000e+00 0.000000e+00
6 Hs3st4 208.010193 4.877477 0.000000e+00 0.000000e+00
Nrg3 165.524002 2.407715 0.000000e+00 0.000000e+00
Ptprd 160.860458 2.074726 0.000000e+00 0.000000e+00
Etl4 149.539642 3.490088 0.000000e+00 0.000000e+00
Cdh18 125.096443 3.583939 0.000000e+00 0.000000e+00
Csmd1 120.169815 1.799085 0.000000e+00 0.000000e+00
Nav3 115.913101 2.040621 0.000000e+00 0.000000e+00
Nalf1 112.029533 1.917980 0.000000e+00 0.000000e+00
Frmpd4 104.698502 2.777440 0.000000e+00 0.000000e+00
Syt1 101.681335 1.767520 0.000000e+00 0.000000e+00
Prickle1 69.170570 2.165214 0.000000e+00 0.000000e+00
Garnl3 68.942566 3.106019 0.000000e+00 0.000000e+00
Slc8a1 67.668358 1.795062 0.000000e+00 0.000000e+00
Fut9 66.745811 2.127027 0.000000e+00 0.000000e+00
Ryr2 66.688759 1.769109 0.000000e+00 0.000000e+00
4 Phactr1 170.293198 3.297709 0.000000e+00 0.000000e+00
Cacna2d3 161.046173 3.269646 0.000000e+00 0.000000e+00
Rarb 156.014038 5.616486 0.000000e+00 0.000000e+00
Ryr3 134.184540 3.132653 0.000000e+00 0.000000e+00
Dgkb 123.956688 2.777103 0.000000e+00 0.000000e+00
Rgs9 122.611969 5.302050 0.000000e+00 0.000000e+00
Grm5 119.174614 2.292377 0.000000e+00 0.000000e+00
Celf2 117.857468 2.183239 0.000000e+00 0.000000e+00
Meis2 116.666595 3.646053 0.000000e+00 0.000000e+00
Kcnq5 116.074730 2.755988 0.000000e+00 0.000000e+00
Pde10a 114.707039 2.759833 0.000000e+00 0.000000e+00
Sgcz 111.795059 2.839822 0.000000e+00 0.000000e+00
Adcy5 109.838974 4.025107 0.000000e+00 0.000000e+00
Osbpl8 73.052383 2.243474 0.000000e+00 0.000000e+00
Elmod1 72.750404 2.141031 0.000000e+00 0.000000e+00

Sub-Clustering¶

Sub-cluster the biggest <subcluster_biggest> Leiden clusters?

For instance if subcluster_biggest = 3, take top three most abundant (highest cell count) clusters and sub-cluster them.

In [ ]:
resolution_sub = None if subcluster_biggest is False else 0.005

if subcluster_biggest is not False:

    # Sub-Clustering
    col_celltype = "leiden_subcluster"
    biggest_clusters = self.rna.obs["leiden"].value_counts().index.values[
        :min(len(self.rna.obs["leiden"].unique()), subcluster_biggest)]
    self.rna.obs.loc[:, col_celltype] = self.rna.obs.leiden.copy()
    for x in biggest_clusters:
        sc.tl.leiden(self.rna, restrict_to=(col_celltype, [x]),
                     key_added=col_celltype, resolution=resolution_sub,
                     seed=0, n_iterations=-1, use_weights=True)
    self.find_markers(col_celltype=col_celltype)  # DEGs by cluster
    self.plot(kind="umap", color=col_celltype)
    print(self.rna.obs[[col_sample, col_celltype]].groupby(
        col_sample).value_counts().unstack(1))
    self.rna.obs.loc[:, f"resolution_{col_celltype}"] = resolution_sub
    self._info["col_celltype"] = col_celltype

    # Marker Plots
    if markers_predefined is not None:
        mks_c = dict(zip(markers_predefined, [markers_predefined[
            x].intersection(self.rna.var_names) for x in markers_predefined]))
        _ = self.plot(genes=mks_c, figsize=(15, 15), vmax=0.7,
                      layer="scaled", standard_scale="obs", kind="heat")
    markers_df = self.get_markers_df(
        n_genes=15, col_celltype=cct,
        p_threshold=1e-10, log2fc_threshold=1.5, log2fc_threshold_abs=False)
    markers_dict = dict(markers_df.groupby(cct).apply(
        lambda x: list(x.reset_index().names)))  # dictionary version of df
    _ = self.plot(genes=markers_dict, figsize=(15, 15),
                  layer="scaled", standard_scale="obs", kind="heat")
    print(markers_df)

Annotate¶

Annotate cell types with various methods

Annotate by Marker Gene Overlap¶

File from https://github.com/nasa/GeneLab_Data_Processing/blob/master/scRNAseq/10X_Chromium_3prime_Data/GeneLab_CellType_GeneMarkers/GL-DPPD-7111_GeneMarker_Files/GL-DPPD-7111_Mmus_Brain_CellType_GeneMarkers.csv

Example of the Expected Marker Definition Format

markers_predefined = {
    "CD4 T cells": {"IL7R"},
    "CD14+ Monocytes": {"CD14", "LYZ"},
    "B cells": {"MS4A1"},
    "CD8 T cells": {"CD8A"},
    "NK cells": {"GNLY", "NKG7"},
    "FCGR3A+ Monocytes": {"FCGR3A", "MS4A7"},
    "Dendritic Cells": {"FCER1A", "CST3"},
    "Megakaryocytes": {"PPBP"},
}
In [32]:
# Plot
_ = self.plot(genes=mks_c, figsize=(15, 15),
              layer="scaled", standard_scale="var", kind="matrix")

# Annotate
marker_matches = self.annotate(
    mks_collapsed,
    # celltypes_superhierarchical=celltypes_superhierarchical,
    col_celltype=col_celltype, col_celltype_new="annotation_by_overlap",
    top_n_markers=50,  # can only have this one or `adj_pval_threshold`
    # adj_pval_threshold=1e-10,
    # method="overlap_count",
    method="overlap_coef",
    # method="jaccard",
    overwrite=True)

# Plot Predefined Marker Expression
# cct = col_celltype
# # cct = "annotation_by_markers_individual"
# mks_col = dict(zip(self.rna.obs[cct].cat.categories, [list(
#     mks_collapsed[x].intersection(self.rna.var_names)
#     ) for x in self.rna.obs[cct].cat.categories]))
# _ = self.plot(genes=mks_col, figsize=(15, 15), col_celltype=cct,
#               layer="scaled", standard_scale="obs", kind="heat")

# Rename Any Labels As Pre-Specified
if rename_marker_based_annotation is not None:
    self.rna.obs.loc[:, "annotation_by_overlap"] = self.rna.obs[
        "annotation_by_overlap"].replace(rename_marker_based_annotation)

# Print & Plot Results
self.plot(kind="umap", color="annotation_by_overlap", wspace=0.4)
print(round(self.rna.obs["annotation_by_overlap"].value_counts(
    normalize=True) * 100, 2))
round(self.rna.obs[[col_celltype, "annotation_by_overlap"]
                   ].value_counts(normalize=True)* 100, 2).sort_values()
No description has been provided for this image
annotation_by_overlap
OPC                49.92
Neuron             33.17
Oligodendrocyte     9.53
Astrocyte           7.38
Name: proportion, dtype: float64
Out[32]:
leiden  annotation_by_overlap
7       Neuron                    1.87
2       Neuron                    4.57
6       OPC                       4.90
5       Astrocyte                 7.38
1       Oligodendrocyte           9.53
4       Neuron                   10.39
3       Neuron                   16.34
0       OPC                      45.02
Name: proportion, dtype: float64
No description has been provided for this image

Annotate with ToppGene¶

In [36]:
# Options
min_genes = 2  # minimum markers that have to overlap between Leiden & atlas
remove_strings = ["----L1-6", # "---[|]M.*",
                  "facs-", "-nan-",
                  # "-i_Gaba_3-.*",
                  "Brain_organoid-organoid_Kanton_Nature-Organoid-..-",
                  # "Non-neuronal-Macroglial-((^|)(Oligo|Astro))+-",
                  # "-Glut_E.*IL7R",
                  "cells hierarchy compared to all cells using T-S.*",
                  ".*-organoid_Tanaka_cellReport-.+-",
                  "...BrainAtlas -.*", "-eN2.*", "...Sample groups.*",
                  "...Sample Type, Dataset.*",
                  "-Neuronal",
                  " // Primary Cells by Cluster",
                  ".World...Primary Cells by Cluster",
                  "Brain_organoid-organoid_Velasco_nature-6_",
                  "Fetal_brain-fetalBrain_Zhong_nature-....-",
                  "Somatosensory_Cortex_....-Neuronal-",
                  "Non-neuronal-Non-dividing-",
                  "...Sample groups..6 Anatomical region groups., with 5.*",
                  "Brain_organoid-organoid_Paulsen_bioRxiv-",
                  "-Glut_E_(THEMIS)", "[(]THEMIS[)]",  # "[|].*",
                  "- method, tissue, subtissue, age, lineage.*"]
drop_name_patterns = ["striatum", "globus", "Entopeduncular",
                      "Substantia_nigra-", "Thalamus-"]
toppgene_rename_by_pattern = dict(
    Inhibitory=["Inh(_|ib)", "GABA"], Excitatory=["Excit", "Glut"],
    # Inhibitory=["Inh(_|ib)"], Excitatory=["Excit"],
    # # Gabaergic=["GABA"], Glutamatergic=["Glut"],
    Astrocyte=["Astrocyte","Astroglia", "Macroglial-Astro"],
    Microglial=["Microglia", "Micro"],
    Endothelial=["Endothelial"],
    Oligodendrocyte=[r"^(?=.*oligo)(?!.*poly)(?!.*opc).*"],
    OPC=["Polydendrocyte", "OPC"])
drop_regions = [
    "Mid-temporal_gyrus_(MTG)", "primary_auditory_cortex_(A1C)",
    "Somatosensory_Cortex_(S1)", "Anterior_Cingulate_gyrus_(CgG)",
    "Primary_Motor_Cortex_(M1)",
    "Mid-temporal_gyrus_(MTG)|Mid-temporal_gyrus_(MTG)",
    "primary_auditory_cortex_(A1C)|primary_auditory_cortex_(A1C)",
    "Somatosensory_Cortex_(S1)|Somatosensory_Cortex_(S1)",
    "Anterior_Cingulate_gyrus_(CgG)|Anterior_Cingulate_gyrus_(CgG)",
    "Primary_Motor_Cortex_(M1)|Primary_Motor_Cortex_(M1)",
    r"Neuronal|World / ",
    "Primary_Visual_cortex_(V1C)|Primary_Visual_cortex_(V1C)",
    "mon",
    "BMP_responsible_cell|6m", "bearing_cell|6m", "bearing_cell|GW16", "11",
    "Non-neuronal-Non-dividing",
    "Frontal_cortex|Frontal_cortex",
    "Primary_Visual_cortex_(V1C)", "Substantia_nigra",
    "Thalamus", "Hippocampus", "Frontal_cortex"
]  # remove if name is just a region or top-level hierarchical/undesired type
drop_regions = drop_regions + [f"{i}-Non-neuronal" for i in drop_regions]

# Query ToppGene
results_toppgene = scflow.pp.annotate_by_toppgene(
    markers_dict, remove_strings=remove_strings,
    species=species, min_genes=min_genes, source_patterns=source_patterns)

# Remove or Alter Certain Name Patterns
drop_names = results_toppgene.Name.apply(lambda x: not any((
    i.lower() in x.lower() for i in drop_name_patterns)))
results_toppgene = results_toppgene[drop_names]
rn_tg = results_toppgene.Name.apply(lambda x: {x: " | ".join([
    j for j in toppgene_rename_by_pattern if any((re.search(i.lower(
        ), x.lower()) is not None for i in toppgene_rename_by_pattern[
            j]))])}).apply(lambda x: np.nan if x[list(x.keys())[
                0]] == "" else x).dropna().reset_index(drop=True).apply(
                    lambda x: pd.Series(x)).stack().reset_index(
                        0, drop=True)  # renaming guide
results_toppgene = results_toppgene.replace({"Name": dict(rn_tg)})
results_toppgene = results_toppgene[~results_toppgene.Name.isin(drop_regions)]

# Map Labels (Plurality Vote If Sufficient or Top)
top_cs = dict(results_toppgene.groupby(
    "Gene Set").apply(lambda x: x.Name.iloc[:10].value_counts().index.values[
        0] if x.Name.iloc[:10].value_counts(
            normalize=True).iloc[0] >= 0.25 else x.Name[0]))
print("\n".join([f"{k}: {top_cs[k]}" for k in top_cs]), "\n\n")
if "annotation_toppgene" in self.rna.obs:
    self.rna.obs = self.rna.obs.drop("annotation_toppgene", axis=1)
self.rna.obs = self.rna.obs.join(self.rna.obs[col_celltype].replace(
    top_cs).to_frame("annotation_toppgene")).loc[self.rna.obs.index]

# Display Results
if "annotation_by_overlap" in self.rna.obs:
    print(round(self.rna.obs[["annotation_toppgene", "annotation_by_overlap"]
                             ].value_counts(normalize=True).sort_index(
                                 ) * 100, 2), "\n\n")
print(round(self.rna.obs["annotation_toppgene"].value_counts(
    normalize=True) * 100, 2))
results_toppgene.reset_index("ID", drop=True).drop([
    "QValueBonferroni", "QValueFDRBY", "QValueFDRBH",
    "TotalGenes", "Genes"], axis=1)
Server returned HTTP status code: 400
Content: {'error': 'No valid lookup found for symbol Gm20754'}
0: Inhibitory
1: Oligodendrocyte
2: Excitatory
3: Excitatory
4: Excitatory
5: Astrocyte
6: Excitatory
7: Inhibitory 


annotation_toppgene  annotation_by_overlap
Inhibitory           OPC                      45.02
                     Neuron                    1.87
Oligodendrocyte      Oligodendrocyte           9.53
Excitatory           OPC                       4.90
                     Neuron                   31.29
Astrocyte            Astrocyte                 7.38
Name: proportion, dtype: float64 


annotation_toppgene
Inhibitory         46.89
Excitatory         36.20
Oligodendrocyte     9.53
Astrocyte           7.38
Name: proportion, dtype: float64
Out[36]:
Name PValue GenesInTerm GenesInQuery GenesInTermInQuery Source Name_Original percent_atlas_genes_in_query Symbols
Gene Set Category
0 ToppCell Inhibitory 0.0 180 14 5 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.777778 [TENM1, ANKFN1, ADARB2, INPP4B, COL25A1]
ToppCell Inhibitory 0.0 184 14 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 2.717391 [TENM1, DOCK11, DPP6, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 187 14 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 2.673797 [TENM1, DOCK11, DPP6, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 190 14 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.631579 [ADARB2, TMEM255A, NXPH1, COL25A1, ST8SIA4]
ToppCell Inhibitory 0.0 194 14 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.57732 [DOCK11, ADARB2, NXPH1, COL25A1, ST8SIA4]
ToppCell Inhibitory 0.0 194 14 5 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.57732 [TENM1, DOCK11, ADARB2, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 196 14 5 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.55102 [TENM1, DOCK11, ADARB2, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 5 Human Adult Brain MTG Neuronal-Inhibitory|Neuronal / cells hierarchy... 2.5 [TENM1, DOCK11, ADARB2, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 5 Human Adult Brain MTG Neuronal-Inhibitory-iB|Neuronal / cells hierar... 2.5 [TENM1, DOCK11, NXPH1, COL25A1, ST8SIA4]
ToppCell Inhibitory 0.0 176 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.272727 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 176 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.272727 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 177 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.259887 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 177 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.259887 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 178 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.247191 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 179 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.234637 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 181 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.209945 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 182 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.197802 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 182 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.197802 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 183 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.185792 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 183 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.185792 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 183 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.185792 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 183 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.185792 [TMEM255A, NXPH1, COL25A1, ST8SIA4]
ToppCell Inhibitory 0.0 183 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.185792 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 184 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.173913 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 184 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 2.173913 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 184 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.173913 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 185 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.162162 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 185 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.162162 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 186 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 2.150538 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 186 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.150538 [ADARB2, INPP4B, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 186 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.150538 [ADARB2, TMEM255A, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 186 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.150538 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 187 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.139037 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 187 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.139037 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 188 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.12766 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 188 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.12766 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 188 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.12766 [ADARB2, INPP4B, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 188 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.12766 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 188 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.12766 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 189 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.116402 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 189 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.116402 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 189 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.116402 [DOCK11, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 189 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.116402 [ADARB2, INPP4B, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 2.105263 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.105263 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.105263 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 2.105263 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.105263 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.105263 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.105263 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.105263 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 2.105263 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.105263 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.105263 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 191 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.094241 [DOCK11, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 191 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.094241 [ADARB2, INPP4B, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 191 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.094241 [ADARB2, INPP4B, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 192 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.083333 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 192 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.083333 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 192 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 2.083333 [DOCK11, ADARB2, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 192 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.083333 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 192 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.083333 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 192 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.083333 [TENM1, DOCK11, ADARB2, NXPH1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.072539 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.072539 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.072539 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.072539 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.072539 [DOCK11, ADARB2, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.072539 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.072539 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.072539 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.072539 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.072539 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.072539 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.072539 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.072539 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 193 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.072539 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 194 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.061856 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 194 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.061856 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 194 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.061856 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 194 14 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.061856 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 194 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.061856 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 194 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.061856 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 194 14 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.061856 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Inhibitory-iB-iB_4(SST)-ADGRG6-|Neuro... 2.0 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Inhibitory-iB-iB_4(SST)-B3GAT2|Neuron... 2.0 [DOCK11, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_2(LAMP5)-CA1-|Neuron... 2.0 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Inhibitory-iB-iB_4(SST)|Neuronal / ce... 2.0 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Inhibitory-iB-iB_4(SST)-CALB1--L1-3|N... 2.0 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_2(LAMP5)-CA1|Neurona... 2.0 [ADARB2, TMEM255A, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Inhibitory-iB-iB_2(PVALB)-SULF1--L4-6... 2.0 [TENM1, DOCK11, NXPH1, ST8SIA4]
ToppCell Inhibitory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Inhibitory-iB-iB_4(SST)-STK32A|Neuron... 2.0 [TENM1, DOCK11, NXPH1, COL25A1]
ToppCell Inhibitory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Inhibitory-iB-iB_3(PVALB-SST)|Neurona... 2.0 [TENM1, DOCK11, NXPH1, ST8SIA4]
1 ToppCell Oligodendrocyte 0.0 188 14 7 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Non-neuronal-Mac... 3.723404 [ST18, PLP1, CLDN11, PDE4B, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 188 14 7 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... 3.723404 [ST18, PLP1, CLDN11, PDE4B, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 200 14 7 Human Adult Brain MTG Macroglial-Oligodendrocytes|Macroglial / cells... 3.5 [NKAIN2, ST18, PLP1, PCDH9, CLDN11, PLCL1, MBP]
ToppCell Oligodendrocyte 0.0 200 14 7 Human Adult Brain MTG Macroglial-Oligodendrocytes-OPALIN---|Macrogli... 3.5 [NKAIN2, ST18, PLP1, PCDH9, CLDN11, PLCL1, MBP]
ToppCell Oligodendrocyte 0.0 200 14 7 Human Adult Brain MTG Macroglial-Oligodendrocytes-OPALIN-|Macroglial... 3.5 [NKAIN2, ST18, PLP1, PCDH9, CLDN11, PLCL1, MBP]
ToppCell Oligodendrocyte 0.0 200 14 7 Human Adult Brain MTG Macroglial-Oligodendrocytes-OPALIN----L1-6|Mac... 3.5 [NKAIN2, ST18, PLP1, PCDH9, CLDN11, PLCL1, MBP]
ToppCell Oligodendrocyte 0.0 200 14 7 Human Adult Brain MTG Macroglial-Oligodendrocytes-OPALIN--|Macroglia... 3.5 [NKAIN2, ST18, PLP1, PCDH9, CLDN11, PLCL1, MBP]
ToppCell Oligodendrocyte 0.0 200 14 7 Human Adult Brain MTG Macroglial-Oligodendrocytes-OPALIN|Macroglial ... 3.5 [NKAIN2, ST18, PLP1, PCDH9, CLDN11, PLCL1, MBP]
ToppCell Oligodendrocyte 0.0 183 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... 3.278689 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 183 14 6 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... 3.278689 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 184 14 6 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... 3.26087 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 184 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... 3.26087 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 184 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... 3.26087 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 185 14 6 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... 3.243243 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 185 14 6 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... 3.243243 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 186 14 6 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... 3.225806 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 186 14 6 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... 3.225806 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... 0.0 187 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... 3.208556 [ST18, PLP1, CLDN11, ERBB4, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 187 14 6 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Non-neuronal-Mac... 3.208556 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 187 14 6 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Non-neuronal-Mac... 3.208556 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 188 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 3.191489 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 188 14 6 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... 3.191489 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 189 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 3.174603 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell OPC 0.0 189 14 6 Comparison of human cortex and organoids Non-neuronal-Non-dividing-OPC|World / Primary ... 3.174603 [PLP1, PCDH9, CLDN11, PDE4B, MBP, APOD]
ToppCell OPC 0.0 189 14 6 Comparison of human cortex and organoids Non-neuronal-Non-dividing-OPC-OPC|World / Prim... 3.174603 [PLP1, PCDH9, CLDN11, PDE4B, MBP, APOD]
ToppCell Oligodendrocyte 0.0 189 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 3.174603 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell OPC 0.0 189 14 6 Comparison of human cortex and organoids Non-neuronal-Non-dividing-OPC-OPC-30|World / P... 3.174603 [PLP1, PCDH9, CLDN11, PDE4B, MBP, APOD]
ToppCell Oligodendrocyte 0.0 190 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 3.157895 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 0.0 190 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 3.157895 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 191 14 6 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... 3.141361 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Primary_Motor_Cortex_(M1)-Non-neuronal|Primary... 0.0 191 14 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal|Primary... 3.141361 [ST18, PLP1, CLDN11, MBP, ASPA, APOD]
ToppCell Oligodendrocyte 0.0 122 14 5 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Macroglia-OLIGODENDROCYTE-O2|Fr... 4.098361 [ST18, PLP1, CLDN11, MBP, ASPA]
ToppCell Oligodendrocyte 0.0 122 14 5 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Macroglia-OLIGODENDROCYTE-O2-Tn... 4.098361 [ST18, PLP1, CLDN11, MBP, ASPA]
ToppCell Oligodendrocyte 0.0 122 14 5 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Macroglia-OLIGODENDROCYTE|Front... 4.098361 [ST18, PLP1, CLDN11, MBP, ASPA]
ToppCell Oligodendrocyte 0.0 124 14 5 Mouse Adult Brain Overview (690k cells, 9 regi... Posterior_cortex-Macroglia-OLIGODENDROCYTE|Pos... 4.032258 [ST18, PLP1, CLDN11, MBP, ASPA]
ToppCell Oligodendrocyte 0.0 132 14 5 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Macroglia-OLIGODENDROCYTE-O2-Trf|H... 3.787879 [PLP1, CDC37L1, CLDN11, MBP, ASPA]
ToppCell Oligodendrocyte 0.0 132 14 5 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Macroglia-OLIGODENDROCYTE-O2|Hippo... 3.787879 [PLP1, CDC37L1, CLDN11, MBP, ASPA]
ToppCell Oligodendrocyte 0.0 137 14 5 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Macroglia-OLIGODENDROCYTE|Hippocam... 3.649635 [PLP1, CDC37L1, CLDN11, MBP, ASPA]
ToppCell Oligodendrocyte 0.0 181 14 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... 2.762431 [ST18, PLP1, CLDN11, MBP, ASPA]
ToppCell Primary_Visual_cortex_(V1C)-Non-neuronal|Prima... 0.0 185 14 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Non-neuronal|Prima... 2.702703 [ST18, PLP1, CLDN11, ASPA, APOD]
ToppCell Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... 0.0 188 14 5 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... 2.659574 [ST18, PLP1, CLDN11, ASPA, APOD]
2 ToppCell Excitatory 0.0 195 14 7 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 3.589744 [NAV3, CELF2, TENM2, GRIN2A, MEG3, CSMD1, NRG3]
ToppCell Excitatory 0.0 197 14 5 Comparison of human cortex and organoids Neuron-Postmitotic-Excitatory_Neuron_-Deep_Lay... 2.538071 [GRM5, TENM2, PDE1A, SHISA6, MEG3]
ToppCell Excitatory 0.0 187 14 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 2.139037 [CELF2, TENM2, PDE1A, TAFA1]
ToppCell Excitatory 0.0 189 14 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 2.116402 [CELF2, TENM2, PDE1A, TAFA1]
ToppCell Excitatory 0.0 190 14 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 2.105263 [TENM2, GRIN2A, CTTNBP2, TAFA1]
ToppCell Inhibitory 0.0 194 14 4 Comparison of human cortex and organoids Neuron-Postmitotic-Inhibitory_Neuron-MGE2|Worl... 2.061856 [GRM5, SHISA6, GRIN2A, MEG3]
ToppCell Inhibitory 0.0 194 14 4 Comparison of human cortex and organoids Neuron-Postmitotic-Inhibitory_Neuron-MGE2-10|W... 2.061856 [GRM5, SHISA6, GRIN2A, MEG3]
ToppCell OPC 0.0 200 14 4 Human Adult Brain MTG Macroglial-Polydendrocytes-PDGFRA----L1-6|Macr... 2.0 [GRM5, GRIA3, MEG3, CSMD1]
ToppCell OPC 0.0 200 14 4 Human Adult Brain MTG Macroglial-Polydendrocytes-PDGFRA|Macroglial /... 2.0 [GRM5, GRIA3, MEG3, CSMD1]
ToppCell OPC 0.0 200 14 4 Human Adult Brain MTG Macroglial-Polydendrocytes-PDGFRA---|Macroglia... 2.0 [GRM5, GRIA3, MEG3, CSMD1]
ToppCell OPC 0.0 200 14 4 Human Adult Brain MTG Macroglial-Polydendrocytes-PDGFRA-|Macroglial ... 2.0 [GRM5, GRIA3, MEG3, CSMD1]
ToppCell OPC 0.0 200 14 4 Human Adult Brain MTG Macroglial-Polydendrocytes-PDGFRA--|Macroglial... 2.0 [GRM5, GRIA3, MEG3, CSMD1]
ToppCell Excitatory 0.0 200 14 4 Human Adult Brain MTG Neuronal-Excitatory|Neuronal / cells hierarchy... 2.0 [NAV3, CELF2, PDE1A, TAFA1]
ToppCell OPC 0.0 200 14 4 Human Adult Brain MTG Macroglial-Polydendrocytes|Macroglial / cells ... 2.0 [GRM5, GRIA3, MEG3, CSMD1]
ToppCell Excitatory 0.000011 143 14 3 Mouse Adult Brain Overview (690k cells, 9 regi... Posterior_cortex-Neuronal-Excitatory-eN1(Slc17... 2.097902 [EPHA6, GRIA3, TAFA1]
ToppCell Inhibitory 0.000018 167 14 3 Mouse Adult Brain Overview (690k cells, 9 regi... Cerebellum-Neuronal-Inhibitory-iN1(Gad1Gad2_Th... 1.796407 [GRIA3, SHISA6, MEG3]
ToppCell Inhibitory 0.000018 167 14 3 Mouse Adult Brain Overview (690k cells, 9 regi... Cerebellum-Neuronal-Inhibitory|Cerebellum / Br... 1.796407 [GRIA3, SHISA6, MEG3]
ToppCell Excitatory 0.000018 168 14 3 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Neuronal-Excitatory-eN1(Slc17a7)|H... 1.785714 [PDE1A, GRIN2A, TAFA1]
ToppCell Excitatory 0.000023 181 14 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 1.657459 [EPHA6, TENM2, TAFA1]
ToppCell Excitatory 0.000024 186 14 3 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... 1.612903 [CELF2, PDE1A, TAFA1]
3 ToppCell Excitatory 0.0 195 15 11 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 5.641026 [NAV3, CELF2, KALRN, NRG1, MEF2C, MEG3, MLIP, ...
ToppCell Excitatory 0.0 192 15 8 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Glutamaterg... 4.166667 [CELF2, KALRN, NRG1, LINGO2, MLIP, DPP10, PTPR...
ToppCell Excitatory 0.0 189 15 7 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 3.703704 [CELF2, KALRN, LINGO2, MLIP, DPP10, PTPRD, KCNQ5]
ToppCell Excitatory 0.0 190 15 7 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 3.684211 [CELF2, KALRN, NRG1, LINGO2, MLIP, PTPRD, KCNQ5]
ToppCell Excitatory 0.0 190 15 7 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 3.684211 [CELF2, KALRN, NRG1, LINGO2, MLIP, PTPRD, KCNQ5]
ToppCell Excitatory 0.0 200 15 7 Human Adult Brain MTG Neuronal-Excitatory|Neuronal / cells hierarchy... 3.5 [NAV3, CELF2, KALRN, MLIP, DPP10, PTPRD, KCNQ5]
ToppCell Posterior_cortex|World 0.0 159 15 6 Mouse Adult Brain Overview (690k cells, 9 regi... Posterior_cortex|World / BrainAtlas - Mouse Mc... 3.773585 [KALRN, MEF2C, KCNH7, KCTD16, MLIP, KCNQ5]
ToppCell Excitatory 0.0 187 15 6 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 3.208556 [CELF2, KALRN, LINGO2, MLIP, PTPRD, KCNQ5]
ToppCell Excitatory 0.0 188 15 6 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 3.191489 [CELF2, KALRN, LINGO2, MLIP, PTPRD, KCNQ5]
ToppCell Excitatory 0.0 189 15 6 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 3.174603 [CELF2, KALRN, LINGO2, MLIP, PTPRD, KCNQ5]
ToppCell Excitatory 0.0 186 15 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... 2.688172 [CELF2, KALRN, NRG1, MLIP, PTPRD]
ToppCell Excitatory 0.0 192 15 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 2.604167 [NRG1, MEF2C, LINGO2, KCNH7, PTPRD]
ToppCell Excitatory 0.0 174 15 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 2.298851 [NRG1, LINGO2, MLIP, PTPRD]
ToppCell Excitatory 0.0 190 15 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 2.105263 [MEF2C, LINGO2, KCTD16, PTPRD]
ToppCell Excitatory 0.0 195 15 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 2.051282 [KALRN, MEF2C, LINGO2, PTPRD]
ToppCell Neuron 0.0 199 15 4 Comparison of human cortex and organoids Neuron|World / Primary Cells by Cluster 2.01005 [CELF2, MEF2C, MEG3, PTPRD]
ToppCell Neuron-Postmitotic 0.0 199 15 4 Comparison of human cortex and organoids Neuron-Postmitotic|World / Primary Cells by Cl... 2.01005 [CELF2, MEF2C, MEG3, PTPRD]
ToppCell Excitatory 0.0 200 15 4 Comparison of human cortex and organoids Neuron-Postmitotic-Excitatory_Neuron_-Upper_La... 2.0 [CELF2, MEF2C, MEG3, PTPRD]
ToppCell Excitatory 0.0 200 15 4 Comparison of human cortex and organoids Neuron-Postmitotic-Excitatory_Neuron_-Upper_La... 2.0 [CELF2, MEF2C, MEG3, PTPRD]
ToppCell Cortical_neuron|GW09 0.0 200 15 4 Integration of Four Brain Organoid Datasets an... Fetal_brain-organoid_Tanaka_cellReport-GW09-Ne... 2.0 [CELF2, MEF2C, KCNH7, PTPRD]
ToppCell Frontal_cortex|World 0.000021 164 15 3 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex|World / BrainAtlas - Mouse McCa... 1.829268 [MEF2C, KCNH7, KCNQ5]
ToppCell Excitatory 0.000023 170 15 3 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 1.764706 [NRG1, LINGO2, MLIP]
ToppCell Excitatory 0.000025 173 15 3 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 1.734104 [NRG1, KCNH7, DPP10]
ToppCell Excitatory 0.000025 174 15 3 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 1.724138 [NRG1, KCNH7, DPP10]
ToppCell Excitatory 0.000025 175 15 3 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Neuronal-Excitatory-eN1(Slc17a7... 1.714286 [KALRN, MEF2C, KCNH7]
ToppCell Excitatory 0.000025 175 15 3 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Neuronal-Excitatory-eN1(Slc17a7... 1.714286 [KALRN, MEF2C, KCNH7]
ToppCell Excitatory 0.000026 176 15 3 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 1.704545 [LINGO2, KCTD16, MLIP]
4 ToppCell Excitatory 0.0 170 15 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 2.352941 [SGCZ, RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.0 189 15 4 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 2.116402 [CELF2, CACNA2D3, KCNQ5, PHACTR1]
ToppCell Excitatory 0.0 189 15 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 2.116402 [CELF2, CACNA2D3, KCNQ5, PHACTR1]
ToppCell Neuronal-ventral_progenitors_and_neurons_1|Org... 0.0 190 15 4 Integration of Four Brain Organoid Datasets an... Brain_organoid-organoid_Kanton_Nature-Organoid... 2.105263 [GRM5, ELMOD1, MEIS2, CACNA2D3]
ToppCell Excitatory 0.0 190 15 4 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 2.105263 [CELF2, CACNA2D3, KCNQ5, PHACTR1]
ToppCell Excitatory 0.0 190 15 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 2.105263 [CELF2, CACNA2D3, KCNQ5, PHACTR1]
ToppCell Excitatory 0.0 192 15 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Glutamaterg... 2.083333 [CELF2, CACNA2D3, KCNQ5, PHACTR1]
ToppCell Excitatory 0.0 195 15 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 2.051282 [CELF2, CACNA2D3, KCNQ5, PHACTR1]
ToppCell Excitatory 0.00001 128 15 3 Mouse Adult Brain Overview (690k cells, 9 regi... Cerebellum-Neuronal-Excitatory-eN2(Slc17a7_Slc... 2.34375 [SGCZ, RGS9, KCNQ5]
ToppCell Excitatory 0.00001 128 15 3 Mouse Adult Brain Overview (690k cells, 9 regi... Cerebellum-Neuronal-Excitatory-eN2(Slc17a7_Slc... 2.34375 [SGCZ, RGS9, KCNQ5]
ToppCell Excitatory 0.000023 168 15 3 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 1.785714 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000025 173 15 3 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 1.734104 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000025 174 15 3 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... 1.724138 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000025 174 15 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 1.724138 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000025 175 15 3 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 1.714286 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000025 175 15 3 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Neuronal-Excitatory-eN1(Slc17a7... 1.714286 [GRM5, DGKB, MEIS2]
ToppCell Excitatory 0.000025 175 15 3 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Neuronal-Excitatory-eN1(Slc17a7... 1.714286 [GRM5, DGKB, MEIS2]
ToppCell Excitatory 0.000026 176 15 3 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 1.704545 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000026 176 15 3 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 1.704545 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000026 177 15 3 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... 1.694915 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000027 178 15 3 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 1.685393 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000027 179 15 3 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... 1.675978 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000027 179 15 3 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 1.675978 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000028 180 15 3 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 1.666667 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000028 180 15 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Glutamaterg... 1.666667 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000028 181 15 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Glutamaterg... 1.657459 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000029 183 15 3 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 1.639344 [RGS9, MEIS2, RYR3]
ToppCell Excitatory 0.000031 187 15 3 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 1.604278 [CELF2, KCNQ5, PHACTR1]
ToppCell Excitatory 0.000031 188 15 3 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 1.595745 [CELF2, KCNQ5, PHACTR1]
5 ToppCell Astrocyte 0.0 151 12 6 Mouse Adult Brain Overview (690k cells, 9 regi... Cerebellum-Macroglia-ASTROCYTE-Gja1|Cerebellum... 3.97351 [PLA2G7, NWD1, F3, RGS20, SLC1A2, SLC6A11]
ToppCell Astrocyte 0.0 151 12 6 Mouse Adult Brain Overview (690k cells, 9 regi... Cerebellum-Macroglia-ASTROCYTE|Cerebellum / Br... 3.97351 [PLA2G7, NWD1, F3, RGS20, SLC1A2, SLC6A11]
ToppCell Astrocyte 0.0 187 12 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... 2.673797 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 187 12 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... 2.673797 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 187 12 5 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... 2.673797 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 187 12 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... 2.673797 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 187 12 5 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... 2.673797 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 187 12 5 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... 2.673797 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 190 12 5 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... 2.631579 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 190 12 5 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Non-neuronal-Mac... 2.631579 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 190 12 5 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... 2.631579 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 190 12 5 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... 2.631579 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 192 12 5 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Non-neuronal-Mac... 2.604167 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 192 12 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 2.604167 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 192 12 5 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Non-neuronal-Mac... 2.604167 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 193 12 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 2.590674 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 193 12 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 2.590674 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 200 12 5 Human Adult Brain MTG Macroglial-Astrocytes|Macroglial / cells hiera... 2.5 [NWD1, GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 121 12 4 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Macroglia-ASTROCYTE-Gja1-Astrocyte... 3.305785 [PLA2G7, NWD1, GPC5, SLC1A2]
ToppCell Astrocyte 0.0 121 12 4 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Macroglia-ASTROCYTE-Gja1-Astrocyte... 3.305785 [PLA2G7, NWD1, GPC5, SLC1A2]
ToppCell Astrocyte 0.0 121 12 4 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Macroglia-ASTROCYTE-Gja1-Astrocyte... 3.305785 [PLA2G7, NWD1, GPC5, SLC1A2]
ToppCell Astrocyte 0.0 187 12 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Non-neuronal-Mac... 2.139037 [NWD1, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 196 12 4 Integration of Four Brain Organoid Datasets an... Fetal_brain-fetalBrain_Zhong_nature-GW23-Macro... 2.040816 [GPC5, PHKA1, F3, RGS20]
ToppCell Macroglial|GW23 0.0 196 12 4 Integration of Four Brain Organoid Datasets an... Fetal_brain-fetalBrain_Zhong_nature-GW23-Macro... 2.040816 [GPC5, PHKA1, F3, RGS20]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-SLC14A1|Macroglial / cel... 2.0 [GPC5, F3, RGS20, SLC1A2]
ToppCell Macroglial|GW23 0.0 200 12 4 Integration of Four Brain Organoid Datasets an... Fetal_brain-organoid_Tanaka_cellReport-GW23-Ma... 2.0 [BCAN, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-GFAP---|Macroglial / cel... 2.0 [NWD1, GPC5, F3, RGS20]
ToppCell Astrocyte 0.0 200 12 4 Integration of Four Brain Organoid Datasets an... Fetal_brain-fetalBrain_Zhong_nature-GW26-Macro... 2.0 [BCAN, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-GFAP-|Macroglial / cells... 2.0 [NWD1, GPC5, F3, RGS20]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-GFAP--|Macroglial / cell... 2.0 [NWD1, GPC5, F3, RGS20]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-GFAP|Macroglial / cells ... 2.0 [NWD1, GPC5, F3, RGS20]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-SLC14A1-|Macroglial / ce... 2.0 [GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-SLC14A1---|Macroglial / ... 2.0 [GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-GFAP----L1-2|Macroglial ... 2.0 [NWD1, GPC5, F3, RGS20]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-SLC14A1----L1-6|Macrogli... 2.0 [GPC5, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.0 200 12 4 Human Adult Brain MTG Macroglial-Astrocytes-SLC14A1--|Macroglial / c... 2.0 [GPC5, F3, RGS20, SLC1A2]
ToppCell Macroglial|GW26 0.0 200 12 4 Integration of Four Brain Organoid Datasets an... Fetal_brain-fetalBrain_Zhong_nature-GW26-Macro... 2.0 [BCAN, F3, RGS20, SLC1A2]
ToppCell Astrocyte 0.000004 119 12 3 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Macroglia-ASTROCYTE|Hippocampus / ... 2.521008 [PLA2G7, NWD1, SLC1A2]
ToppCell Astrocyte 0.000004 119 12 3 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Macroglia-ASTROCYTE-Gja1|Hippocamp... 2.521008 [PLA2G7, NWD1, SLC1A2]
ToppCell Astrocyte 0.000012 172 12 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... 1.744186 [NWD1, F3, RGS20]
ToppCell Astrocyte 0.000013 178 12 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... 1.685393 [NWD1, F3, RGS20]
ToppCell Astrocyte 0.000014 181 12 3 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... 1.657459 [NWD1, F3, RGS20]
ToppCell Astrocyte 0.000015 186 12 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... 1.612903 [NWD1, F3, RGS20]
ToppCell Astrocyte 0.000015 186 12 3 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... 1.612903 [NWD1, F3, RGS20]
ToppCell Astrocyte 0.000015 187 12 3 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... 1.604278 [NWD1, F3, RGS20]
ToppCell Astrocyte 0.000016 190 12 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... 1.578947 [NWD1, F3, RGS20]
ToppCell Astrocyte 0.000016 190 12 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... 1.578947 [NWD1, F3, RGS20]
ToppCell Radial_Glia-oRG-26 0.000016 192 12 3 Comparison of human cortex and organoids Non-neuronal-Non-dividing-Radial_Glia-oRG-26|W... 1.5625 [BCAN, F3, RGS20]
ToppCell Brain_organoid-organoid_Velasco_nature-3_mon-R... 0.000018 198 12 3 Integration of Four Brain Organoid Datasets an... Brain_organoid-organoid_Velasco_nature-3_mon-R... 1.515152 [BCAN, F3, RGS20]
ToppCell Brain_organoid-organoid_Velasco_nature-3_mon-R... 0.000018 199 12 3 Integration of Four Brain Organoid Datasets an... Brain_organoid-organoid_Velasco_nature-3_mon-R... 1.507538 [BCAN, F3, RGS20]
ToppCell 3.5_mon-Radial_glial-oRG|3.5_mon 0.000018 199 12 3 Integration of Four Brain Organoid Datasets an... Brain_organoid-organoid_Paulsen_bioRxiv-3.5_mo... 1.507538 [BCAN, F3, RGS20]
ToppCell mon-Radial_glial-oRG|6_mon 0.000018 199 12 3 Integration of Four Brain Organoid Datasets an... Brain_organoid-organoid_Velasco_nature-6_mon-R... 1.507538 [BCAN, F3, SLC1A2]
ToppCell Astrocyte 0.000018 200 12 3 Integration of Four Brain Organoid Datasets an... Brain_organoid-organoid_Tanaka_cellReport-3m-M... 1.5 [BCAN, F3, RGS20]
ToppCell Astrocyte 0.000018 200 12 3 Integration of Four Brain Organoid Datasets an... Brain_organoid-organoid_Tanaka_cellReport-6m-M... 1.5 [BCAN, F3, SLC1A2]
ToppCell Macroglial|3m 0.000018 200 12 3 Integration of Four Brain Organoid Datasets an... Brain_organoid-organoid_Tanaka_cellReport-3m-M... 1.5 [BCAN, F3, RGS20]
6 ToppCell Excitatory 0.0 195 15 10 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... 5.128205 [NAV3, SYT1, SLC8A1, RYR2, NALF1, PTPRD, CSMD1...
ToppCell Excitatory 0.0 190 15 6 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Glutama... 3.157895 [PRICKLE1, SLC8A1, RYR2, CDH18, PTPRD, HS3ST4]
ToppCell Excitatory 0.0 200 15 6 Human Adult Brain MTG Neuronal-Excitatory|Neuronal / cells hierarchy... 3.0 [NAV3, PRICKLE1, RYR2, CDH18, PTPRD, HS3ST4]
ToppCell Excitatory 0.0 189 15 5 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 2.645503 [PRICKLE1, RYR2, CDH18, PTPRD, HS3ST4]
ToppCell Excitatory 0.0 190 15 5 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... 2.631579 [PRICKLE1, SLC8A1, RYR2, CDH18, PTPRD]
ToppCell Excitatory 0.0 192 15 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Glutamaterg... 2.604167 [PRICKLE1, SLC8A1, RYR2, CDH18, PTPRD]
ToppCell Excitatory 0.0 150 15 4 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Neuronal-Excitatory-eN2(Slc17a7... 2.666667 [FUT9, CDH18, GARNL3, HS3ST4]
ToppCell Excitatory 0.0 153 15 4 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Neuronal-Excitatory-eN2(Slc17a7... 2.614379 [FUT9, CDH18, GARNL3, HS3ST4]
ToppCell Excitatory 0.0 186 15 4 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... 2.150538 [PRICKLE1, RYR2, CDH18, PTPRD]
ToppCell Excitatory 0.0 200 15 4 Comparison of human cortex and organoids Neuron-Postmitotic-Excitatory_Neuron_-Upper_La... 2.0 [SYT1, SLC8A1, RYR2, PTPRD]
ToppCell Excitatory 0.0 200 15 4 Comparison of human cortex and organoids Neuron-Postmitotic-Excitatory_Neuron_-Upper_La... 2.0 [SYT1, SLC8A1, RYR2, PTPRD]
ToppCell Excitatory 0.000021 163 15 3 Mouse Adult Brain Overview (690k cells, 9 regi... Posterior_cortex-Neuronal-Excitatory-eN2(Slc17... 1.840491 [FUT9, CDH18, HS3ST4]
ToppCell Excitatory 0.000023 170 15 3 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... 1.764706 [KIAA1217, GARNL3, HS3ST4]
ToppCell Excitatory 0.000025 174 15 3 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... 1.724138 [SLC8A1, KIAA1217, HS3ST4]
ToppCell Excitatory 0.000026 177 15 3 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... 1.694915 [KIAA1217, CDH18, HS3ST4]
ToppCell Excitatory 0.000026 177 15 3 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Glutamaterg... 1.694915 [SLC8A1, KIAA1217, HS3ST4]
ToppCell Excitatory 0.000027 179 15 3 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... 1.675978 [KIAA1217, CDH18, HS3ST4]
ToppCell Excitatory 0.00003 184 15 3 Mouse Adult Brain Overview (690k cells, 9 regi... Posterior_cortex-Neuronal-Excitatory|Posterior... 1.630435 [FUT9, RYR2, NRG3]
7 ToppCell Inhibitory 0.0 169 12 9 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Neuronal-Inhibitory|Frontal_cor... 5.325444 [ZNF536, ERBB4, GRIP1, GALNTL6, SNTG1, SLC2A13...
ToppCell Inhibitory 0.0 159 12 7 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Neuronal-Inhibitory-iN2(Gad1Gad... 4.402516 [ZNF536, ERBB4, GRIP1, ZMAT4, SLC2A13, ADARB2,...
ToppCell Inhibitory 0.0 167 12 7 Mouse Adult Brain Overview (690k cells, 9 regi... Posterior_cortex-Neuronal-Inhibitory|Posterior... 4.191617 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2, TCF4, ...
ToppCell Inhibitory 0.0 194 12 7 Comparison of human cortex and organoids Neuron-Postmitotic-Inhibitory_Neuron-MGE2|Worl... 3.608247 [ZNF536, ERBB4, GALNTL6, MEG3, ADARB2, TCF4, C...
ToppCell Inhibitory 0.0 194 12 7 Comparison of human cortex and organoids Neuron-Postmitotic-Inhibitory_Neuron-MGE2-10|W... 3.608247 [ZNF536, ERBB4, GALNTL6, MEG3, ADARB2, TCF4, C...
ToppCell Inhibitory 0.0 196 12 7 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 3.571429 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2, TCF4, ...
ToppCell Inhibitory 0.0 191 12 6 Comparison of human cortex and organoids Neuron-Postmitotic-Inhibitory_Neuron|World / P... 3.141361 [ZNF536, ERBB4, GALNTL6, MEG3, ADARB2, TCF4]
ToppCell Inhibitory 0.0 191 12 6 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 3.141361 [ZNF536, ERBB4, GRIP1, ZMAT4, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 192 12 6 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 3.125 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2, TCF4]
ToppCell Inhibitory 0.0 192 12 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 3.125 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2, CNTNAP2]
ToppCell Inhibitory 0.0 192 12 6 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 3.125 [ZNF536, ERBB4, GRIP1, ADARB2, TCF4, CNTNAP2]
ToppCell Inhibitory 0.0 193 12 6 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 3.108808 [ZNF536, ERBB4, GRIP1, ADARB2, TCF4, CNTNAP2]
ToppCell Inhibitory 0.0 194 12 6 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 3.092784 [ZNF536, ERBB4, GRIP1, ADARB2, TCF4, CNTNAP2]
ToppCell Inhibitory 0.0 194 12 6 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 3.092784 [ZNF536, ERBB4, GRIP1, ADARB2, TCF4, CNTNAP2]
ToppCell Inhibitory 0.0 200 12 6 Human Adult Brain MTG Neuronal-Inhibitory|Neuronal / cells hierarchy... 3.0 [ZNF536, ERBB4, GRIP1, ADARB2, TCF4, CNTNAP2]
ToppCell Inhibitory 0.0 200 12 6 Integration of Four Brain Organoid Datasets an... Fetal_brain-fetalBrain_Zhong_nature-GW26-Neuro... 3.0 [ZNF536, ERBB4, GRIP1, MEG3, TCF4, CNTNAP2]
ToppCell Inhibitory 0.0 200 12 6 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_1(SST_PAX6)|Neuronal... 3.0 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2, CNTNAP2]
ToppCell Inhibitory 0.0 200 12 6 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_3(VIP)-VIP_3|Neurona... 3.0 [ZNF536, ERBB4, GALNTL6, ADARB2, TCF4, CNTNAP2]
ToppCell Inhibitory 0.0 200 12 6 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_3(VIP)|Neuronal / ce... 3.0 [ZNF536, ERBB4, GALNTL6, ADARB2, TCF4, CNTNAP2]
ToppCell Inhibitory 0.0 155 12 5 Mouse Adult Brain Overview (690k cells, 9 regi... Posterior_cortex-Neuronal-Inhibitory-iN2(Gad1G... 3.225806 [ZNF536, ERBB4, GRIP1, ADARB2, TCF4]
ToppCell Inhibitory 0.0 169 12 5 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Neuronal-Inhibitory-iN2(Gad1Gad2)|... 2.95858 [ZNF536, ERBB4, GRIP1, SLC2A13, ADARB2]
ToppCell Inhibitory 0.0 181 12 5 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.762431 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 182 12 5 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Neuronal-Inhibitory|Hippocampus / ... 2.747253 [ZNF536, ERBB4, GRIP1, SLC2A13, ADARB2]
ToppCell Inhibitory 0.0 184 12 5 Comparison of human cortex and organoids Neuron-Postmitotic-Inhibitory_Neuron-SST-MGE1-... 2.717391 [ZNF536, ERBB4, MEG3, ADARB2, TCF4]
ToppCell Inhibitory 0.0 185 12 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.702703 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 185 12 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.702703 [ZNF536, ERBB4, GALNTL6, ADARB2, TCF4]
ToppCell Inhibitory 0.0 186 12 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.688172 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 186 12 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.688172 [ZNF536, ERBB4, GRIP1, ZMAT4, GALNTL6]
ToppCell Inhibitory 0.0 187 12 5 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.673797 [ZNF536, ERBB4, GALNTL6, ADARB2, TCF4]
ToppCell Inhibitory 0.0 187 12 5 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.673797 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 188 12 5 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.659574 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 188 12 5 Human Adult Multiple Cortical Areas SMART-seq Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... 2.659574 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 189 12 5 Human Adult Multiple Cortical Areas SMART-seq Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... 2.645503 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 191 12 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.617801 [ZNF536, ERBB4, GALNTL6, ADARB2, TCF4]
ToppCell Inhibitory 0.0 191 12 5 Human Adult Multiple Cortical Areas SMART-seq Mid-temporal_gyrus_(MTG)-Neuronal-Inh_GABAergi... 2.617801 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 192 12 5 Comparison of human cortex and organoids Neuron-Postmitotic-Inhibitory_Neuron-SST-MGE1|... 2.604167 [ZNF536, ERBB4, MEG3, ADARB2, TCF4]
ToppCell Inhibitory 0.0 194 12 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.57732 [ZNF536, ERBB4, GRIP1, ZMAT4, ADARB2]
ToppCell Inhibitory 0.0 194 12 5 Human Adult Multiple Cortical Areas SMART-seq Anterior_Cingulate_gyrus_(CgG)-Neuronal-Inh_GA... 2.57732 [ZNF536, ERBB4, GRIP1, ZMAT4, ADARB2]
ToppCell Fetal_brain-fetalBrain_Zhong_nature-GW23|fetal... 0.0 199 12 5 Integration of Four Brain Organoid Datasets an... Fetal_brain-fetalBrain_Zhong_nature-GW23|fetal... 2.512563 [ZNF536, ERBB4, MEG3, ADARB2, TCF4]
ToppCell Inhibitory 0.0 200 12 5 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_3(VIP)-VIP_3-CBLN1-L... 2.5 [ZNF536, ERBB4, GALNTL6, ADARB2, CNTNAP2]
ToppCell Inhibitory 0.0 200 12 5 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_1(SST_PAX6)-NMBR-|Ne... 2.5 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 200 12 5 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_1(SST_PAX6)-NMBR|Neu... 2.5 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 200 12 5 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_3(VIP)-VIP_3-CHRM2|N... 2.5 [ZNF536, ERBB4, GALNTL6, ADARB2, CNTNAP2]
ToppCell Inhibitory 0.0 200 12 5 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_3(VIP)-VIP_3-CHRM2-L... 2.5 [ZNF536, ERBB4, GALNTL6, ADARB2, CNTNAP2]
ToppCell Inhibitory 0.0 200 12 5 Integration of Four Brain Organoid Datasets an... Fetal_brain-fetalBrain_Zhong_nature-GW23-Neuro... 2.5 [ZNF536, ERBB4, MEG3, ADARB2, TCF4]
ToppCell Inhibitory 0.0 200 12 5 Human Adult Brain MTG Neuronal-Inhibitory-iA|Neuronal / cells hierar... 2.5 [ZNF536, ERBB4, GRIP1, ADARB2, TCF4]
ToppCell Inhibitory 0.0 200 12 5 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_1(SST_PAX6)-NMBR--L1... 2.5 [ZNF536, ERBB4, GRIP1, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 200 12 5 Human Adult Brain MTG Neuronal-Inhibitory-iA-iA_3(VIP)-VIP_3-CBLN1|N... 2.5 [ZNF536, ERBB4, GALNTL6, ADARB2, CNTNAP2]
ToppCell Inhibitory 0.0 139 12 4 Mouse Adult Brain Overview (690k cells, 9 regi... Frontal_cortex-Neuronal-Inhibitory-iN2(Gad1Gad... 2.877698 [ERBB4, SLC2A13, ADARB2, TCF4]
ToppCell Inhibitory 0.0 161 12 4 Mouse Adult Brain Overview (690k cells, 9 regi... Hippocampus-Neuronal-Inhibitory-iN1(Gad1Gad2_T... 2.484472 [ZNF536, ERBB4, GRIP1, SLC2A13]
ToppCell Inhibitory 0.0 164 12 4 Integration of Four Brain Organoid Datasets an... Fetal_brain-fetalBrain_Zhong_nature-GW16-Neuro... 2.439024 [ZNF536, ERBB4, MEG3, ADARB2]
ToppCell Inhibitory 0.0 169 12 4 Human Adult Multiple Cortical Areas SMART-seq Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... 2.366864 [ZNF536, ERBB4, GALNTL6, ADARB2]
ToppCell Inhibitory 0.0 171 12 4 Human Adult Multiple Cortical Areas SMART-seq primary_auditory_cortex_(A1C)-Neuronal-Inh_GAB... 2.339181 [ZNF536, ERBB4, GALNTL6, ADARB2]

Annotate with CellTypist¶

In [37]:
%%time

# To Aggregate More Specific Cell Types
celltypist_rename = dict(
    Gabaergic=["GABA"], Glutamatergic=["Glut"],
    Dopaminergic=["Dopa"], Serotonergic=["Sero"],
    Inhibitory=["Inh"], Excitatory=["Exc"],
    Astrocyte=["Astro"],
    Microglia=["Microglia"],
    Pericyte=["peri"],
    Monocyte=["Monocyte"],
    Lymphoid=["Lymphoid"],
    Endothelial=["Endothelial", "Endo"],
    Oligodendrocyte=[r"^(?=.*oligo)(?!.*poly)(?!.*opc).*"],
    OPC=["OPC", "Polydend"])

# Run CellTypist
self.rna.X = self.rna.layers["counts"].copy()
sc.pp.normalize_total(self.rna, target_sum=10000)
sc.pp.log1p(self.rna) # copy=True: do not update adata.X
predictions = self.annotate(
    model_celltypist, col_celltype=col_celltype, layer=None,
    col_celltype_new="", majority_voting=True, min_prop=0.5, use_GPU=True)
if "majority_voting" in self.rna.obs:
    self.rna.obs.loc[:, "majority_voting_short"] = self.rna.obs[
        "majority_voting"].apply(lambda x: " ".join(x.split(
            " ")[1:]) if all((i in [str(i) for i in np.arange(
                0, 10)] for i in x.split(" ")[
                    0])) else x)  # drop pointless #s in front of cell types

# Rename Cell Types
rn_ct = predictions.predicted_labels.groupby("majority_voting").apply(
    lambda x: {x.name: " | ".join([
        j for j in celltypist_rename if any((re.search(
            i.lower(), x.name.lower()) for i in celltypist_rename[
                j]))])}).apply(lambda x: {list(x.keys())[0]: list(x.keys(
                    ))[0]} if x[list(x.keys())[0] ] == "" else x).apply(
                        lambda x: pd.Series(x)).stack().reset_index(
                            0, drop=True)
if "annotation_majority_voting" in self.rna.obs:
    self.rna.obs = self.rna.obs.drop("annotation_majority_voting", axis=1)
self.rna.obs = self.rna.obs.join(self.rna.obs.replace({
    "majority_voting": dict(rn_ct)})["majority_voting"].to_frame(
        "annotation_majority_voting"))
self.rna.X = self.rna.layers["scaled"].copy()
WARNING: adata.X seems to be already log-transformed.
2025-09-01 16:57:34 | [INFO] 🔬 Input data has 62326 cells and 21692 genes
2025-09-01 16:57:34 | [INFO] 🔗 Matching reference genes in the model
2025-09-01 16:57:35 | [INFO] 🧬 4758 features used for prediction
2025-09-01 16:57:35 | [INFO] ⚖️ Scaling input data
2025-09-01 16:57:47 | [INFO] 🖋️ Predicting labels
2025-09-01 16:57:47 | [INFO] ✅ Prediction done!
2025-09-01 16:57:47 | [INFO] 👀 Detected a neighborhood graph in the input object, will run over-clustering on the basis of it
2025-09-01 16:57:47 | [INFO] ⛓️ Over-clustering input data with resolution set to 20
2025-09-01 16:57:49 | [INFO] 🗳️ Majority voting the predictions
2025-09-01 16:57:50 | [INFO] ✅ Majority voting done!
No description has been provided for this image
<timed exec>:30: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
CPU times: user 19.8 s, sys: 31.6 s, total: 51.5 s
Wall time: 41.1 s

Annotate with Map My Cells¶

  • Make sure to run the following bash commonds after activating the conda environment used for this notebook.

  • Pull cell_type_mapper from GitHub (clone into your home directory): cd && git clone git@github.com:AllenInstitute/cell_type_mapper.git

  • Navigate to that directory and run pip install .

  • Navigate to the folder containing this notebook.

  • Install ABC Atlas (while in same directory as this notebook): pip install -U git+https://github.com/alleninstitute/abc_atlas_access >& scratch/junk.txt

  • Pull lookup files (while in same directory as this notebook):

cd resources
wget https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/mapmycells/WMB-10X/20240831/mouse_markers_230821.json
wget https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/mapmycells/WMB-10X/20240831/precomputed_stats_ABC_revision_230821.h5

Note: To use GPU + Torch, you may need to alter the file "cell_type_mapper/src/cell_type_mapper/cell_by_gene/cell_by_gene.py" line np.where(np.logical_not(np.isfinite(data)))[0] to read instead

try:
    nan_rows = np.where(
        np.logical_not(np.isfinite(data.cpu().numpy())))[0]
except Exception:
    nan_rows = np.where(np.logical_not(np.isfinite(data)))[0]

You may have to run the following code in this notebook:

os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"

and

`_correlation_dot_gpu()` in distance_utils.py change
`correlation = torch.matmul(arr0, arr1)` to
try:
    correlation = torch.matmul(arr0, arr1)
except RuntimeError as err:
    if "CUBLAS_STATUS_NOT_INITIALIZED" in str(err):
        arr0_cpu = arr0.cpu()
        arr1_cpu = arr1.cpu()
        correlation = torch.matmul(arr0_cpu, arr1_cpu).to(arr0.device)
    else:
        raise

to manage processes/memory.

In [ ]:
# %%time

# # Write File to Use as Input for Map My Cells
# if overwrite is True or not os.path.exists(file_new):
#     os.makedirs("data", exist_ok=True)
#     self.rna.X = self.rna.layers["counts"]
#     self.rna.write_h5ad(file_new)
# else:
#     raise ValueError("Must be able to overwrite to run Map My Cells.")

# # Run Map My Cells
# self.rna = scflow.pp.run_mapbraincells(
#     file_new, map_my_cells_source=map_my_cells_source,
#     dir_scratch="scratch", dir_resources="resources",
#     validate_output_file="scratch/tmp.h5ad",  # map_to_ensembl=True,
#     map_my_cells_region_keys=map_my_cells_region_keys,
#     map_my_cells_cell_keys=map_my_cells_cell_keys, verbose_stdout=True,
#     n_processors=4, chunk_size=5000, max_gb=5)

# View Results
# _ = self.plot(kind="umap", color=["cellmap_class_name"])
# if "annotation_toppgene" in self.rna.obs:
#     print(self.rna.obs[["cellmap_class_name", "annotation_toppgene"]
#                        ].value_counts().sort_index())
# self.rna.obs[[i for i in self.rna.obs if "cellmap" in i and "ori" not in i]]

Compare Annotations¶

In [38]:
cols = ["annotation_by_overlap", "annotation_scanvi",
        "annotation_toppgene", "annotation_majority_voting"]
# cols += [i for i in [
#     "cellmap_class_name", "cellmap_subclass_name"] if i in self.rna.obs]
cols = [i for i in cols if i in self.rna.obs]

# Plot UMAPs
self.plot(kind="umap", color=cols, wspace=0.3)

# Compare
# self.rna.obs[cols].value_counts().reset_index().groupby(cols[0]).apply(
#         lambda x: x.sort_values("count", ascending=False).reset_index(
#                 drop=True), include_groups=False).reset_index(
#                         -1, drop=True).set_index(cols[1:], append=True)
self.rna.obs[cols].groupby(cols[0]).apply(
        lambda x: round(100 * x.value_counts(normalize=True), 1).sort_index(
                ).sort_values(ascending=False), include_groups=False)
Out[38]:
annotation_by_overlap  annotation_scanvi  annotation_toppgene  annotation_majority_voting
OPC                    Inhibitory         Inhibitory           Heterogeneous                 17.5
                       Excitatory         Inhibitory           Glutamatergic                 16.3
                       Inhibitory         Inhibitory           Gabaergic                     15.3
                       Excitatory         Excitatory           Glutamatergic                  8.6
                                          Inhibitory           Heterogeneous                  6.8
                       Oligodendrocyte    Inhibitory           Heterogeneous                  5.9
                       OPC                Inhibitory           OPC                            5.1
                       Microglial         Inhibitory           Glutamatergic                  4.3
                       Inhibitory         Inhibitory           Glutamatergic                  3.4
                       Oligodendrocyte    Inhibitory           Glutamatergic                  2.3
                       OPC                Inhibitory           Heterogeneous                  1.9
                       Astrocyte          Inhibitory           Heterogeneous                  1.9
                       Microglial         Inhibitory           Microglia                      1.6
                       Oligodendrocyte    Inhibitory           Oligodendrocyte                1.2
                       Excitatory         Inhibitory           Gabaergic                      1.1
                                          Excitatory           Heterogeneous                  0.9
                       Pericyte           Inhibitory           Glutamatergic                  0.8
                       OPC                Inhibitory           Gabaergic                      0.7
                       Microglial         Inhibitory           Heterogeneous                  0.4
                       OPC                Inhibitory           Oligodendrocyte                0.4
                       Oligodendrocyte    Inhibitory           Gabaergic                      0.3
                       Pericyte           Inhibitory           330 VLMC NN                    0.3
                       Oligodendrocyte    Inhibitory           330 VLMC NN                    0.3
                       Astrocyte          Inhibitory           Glutamatergic                  0.2
                       Oligodendrocyte    Excitatory           Glutamatergic                  0.2
                       Inhibitory         Inhibitory           Dopaminergic                   0.2
                       Excitatory         Inhibitory           330 VLMC NN                    0.2
                       Microglial         Inhibitory           Gabaergic                      0.2
                       Endothelial        Inhibitory           330 VLMC NN                    0.2
                                                               Endothelial                    0.2
                       Microglial         Inhibitory           Endothelial                    0.1
                                                               330 VLMC NN                    0.1
                       Endothelial        Inhibitory           Pericyte                       0.1
                       Astrocyte          Inhibitory           Gabaergic                      0.1
                       Pericyte           Inhibitory           329 ABC NN                     0.1
                       Oligodendrocyte    Inhibitory           329 ABC NN                     0.1
                       Pericyte           Inhibitory           Pericyte                       0.1
                       OPC                Inhibitory           Glutamatergic                  0.1
                       Neuroepithelial    Inhibitory           330 VLMC NN                    0.1
                       Excitatory         Inhibitory           Endothelial                    0.1
                       Inhibitory         Excitatory           Glutamatergic                  0.1
                                          Inhibitory           330 VLMC NN                    0.1
                       Microglial         Inhibitory           Oligodendrocyte                0.1
                       Astrocyte          Inhibitory           Oligodendrocyte                0.0
                                                               OPC                            0.0
                                                               329 ABC NN                     0.0
                                                               316 Bergmann NN                0.0
                                                               Astrocyte                      0.0
                                                               330 VLMC NN                    0.0
                                                               Endothelial                    0.0
                                          Excitatory           Glutamatergic                  0.0
                                                               Heterogeneous                  0.0
                       Excitatory         Inhibitory           OPC                            0.0
                                                               Dopaminergic                   0.0
                       Endothelial        Inhibitory           329 ABC NN                     0.0
                       Microglial         Inhibitory           329 ABC NN                     0.0
                       Inhibitory         Inhibitory           Pericyte                       0.0
                                                               329 ABC NN                     0.0
                                                               Oligodendrocyte                0.0
                                                               Endothelial                    0.0
                       Excitatory         Inhibitory           Microglia                      0.0
                                                               Oligodendrocyte                0.0
                                                               329 ABC NN                     0.0
                                                               Pericyte                       0.0
                                          Excitatory           Gabaergic                      0.0
                                                               330 VLMC NN                    0.0
                       Inhibitory         Inhibitory           Inhibitory                     0.0
                                                               OPC                            0.0
                       Microglial         Inhibitory           Pericyte                       0.0
                       Oligodendrocyte    Inhibitory           316 Bergmann NN                0.0
                                                               Dopaminergic                   0.0
                       OPC                Inhibitory           Pericyte                       0.0
                                                               Endothelial                    0.0
                       Neuroepithelial    Inhibitory           325 CHOR NN                    0.0
                       Microglial         Excitatory           Glutamatergic                  0.0
                       OPC                Inhibitory           316 Bergmann NN                0.0
                       Oligodendrocyte    Inhibitory           Astrocyte                      0.0
                                                               Pericyte                       0.0
                                                               OPC                            0.0
                                                               325 CHOR NN                    0.0
                                                               Endothelial                    0.0
                                          Excitatory           Heterogeneous                  0.0
                       Pericyte           Inhibitory           Gabaergic                      0.0
                                                               Endothelial                    0.0
Oligodendrocyte        Oligodendrocyte    Oligodendrocyte      Oligodendrocyte               98.5
                                                               Glutamatergic                  0.8
                                                               Heterogeneous                  0.4
                                                               Gabaergic                      0.1
                       Inhibitory         Oligodendrocyte      Oligodendrocyte                0.1
                       Oligodendrocyte    Oligodendrocyte      Endothelial                    0.1
                       OPC                Oligodendrocyte      Oligodendrocyte                0.0
                       Excitatory         Oligodendrocyte      Heterogeneous                  0.0
                                                               Oligodendrocyte                0.0
                                                               Glutamatergic                  0.0
                       Astrocyte          Oligodendrocyte      Heterogeneous                  0.0
                       Oligodendrocyte    Oligodendrocyte      329 ABC NN                     0.0
                                                               330 VLMC NN                    0.0
                       Pericyte           Oligodendrocyte      Oligodendrocyte                0.0
Neuron                 Excitatory         Excitatory           Glutamatergic                 57.6
                       Inhibitory         Excitatory           Gabaergic                     29.2
                                          Inhibitory           Gabaergic                      4.1
                       Excitatory         Excitatory           Heterogeneous                  3.9
                       Inhibitory         Inhibitory           Heterogeneous                  1.2
                       Neuroepithelial    Excitatory           325 CHOR NN                    0.9
                       Oligodendrocyte    Excitatory           Glutamatergic                  0.8
                                                               Heterogeneous                  0.5
                       OPC                Excitatory           Glutamatergic                  0.3
                       Inhibitory         Excitatory           Glutamatergic                  0.3
                       Excitatory         Excitatory           Gabaergic                      0.2
                                          Inhibitory           Gabaergic                      0.2
                       Oligodendrocyte    Excitatory           325 CHOR NN                    0.2
                       Inhibitory         Excitatory           Heterogeneous                  0.2
                       Oligodendrocyte    Inhibitory           Gabaergic                      0.1
                       Astrocyte          Excitatory           Glutamatergic                  0.1
                                                               325 CHOR NN                    0.1
                       Endothelial        Excitatory           325 CHOR NN                    0.1
                       Oligodendrocyte    Excitatory           Gabaergic                      0.1
                       Excitatory         Inhibitory           Heterogeneous                  0.1
                       Astrocyte          Inhibitory           Gabaergic                      0.0
                       Inhibitory         Excitatory           Inhibitory                     0.0
                       Astrocyte          Excitatory           Heterogeneous                  0.0
                       Excitatory         Excitatory           325 CHOR NN                    0.0
                                                               Pericyte                       0.0
                       Astrocyte          Excitatory           Gabaergic                      0.0
                       Microglial         Excitatory           Glutamatergic                  0.0
                                                               325 CHOR NN                    0.0
                       Inhibitory         Excitatory           329 ABC NN                     0.0
                                                               Pericyte                       0.0
                                                               325 CHOR NN                    0.0
                       Oligodendrocyte    Inhibitory           Heterogeneous                  0.0
                                          Excitatory           Oligodendrocyte                0.0
                       Pericyte           Excitatory           Glutamatergic                  0.0
Astrocyte              Astrocyte          Astrocyte            Astrocyte                     87.9
                                                               Heterogeneous                  6.7
                                                               316 Bergmann NN                2.0
                       Inhibitory         Astrocyte            Inhibitory                     1.6
                       Oligodendrocyte    Astrocyte            Heterogeneous                  0.5
                       Astrocyte          Astrocyte            Inhibitory                     0.2
                                                               Endothelial                    0.2
                       Excitatory         Astrocyte            Heterogeneous                  0.2
                                                               Inhibitory                     0.1
                       Astrocyte          Astrocyte            Pericyte                       0.1
                       Oligodendrocyte    Astrocyte            316 Bergmann NN                0.1
                                                               Astrocyte                      0.1
                       Astrocyte          Astrocyte            Glutamatergic                  0.0
                                                               330 VLMC NN                    0.0
                                                               Gabaergic                      0.0
                       Microglial         Astrocyte            316 Bergmann NN                0.0
                       Inhibitory         Astrocyte            Astrocyte                      0.0
                                                               Heterogeneous                  0.0
                       OPC                Astrocyte            Astrocyte                      0.0
                       Neuroepithelial    Astrocyte            Heterogeneous                  0.0
                       Microglial         Astrocyte            Heterogeneous                  0.0
                       Oligodendrocyte    Astrocyte            Inhibitory                     0.0
                                                               Glutamatergic                  0.0
                                                               Endothelial                    0.0
                       Pericyte           Astrocyte            Inhibitory                     0.0
                                                               Heterogeneous                  0.0
Name: proportion, dtype: float64
No description has been provided for this image

OSD-612-Specific¶

Clean variable names & keys

In [41]:
self.rna.obs.loc[:, "Age_End"] = self.rna.obs[
    "Characteristics[Age at Euthanasia]"].astype(str) + " Weeks"
self.rna.obs.loc[:, "Condition"] = self.rna.obs["Factor Value[Spaceflight]"]
if "cellmap_class_name" in self.rna.obs:  # cell name w/o region cues
    self.rna.obs.loc[:, "cellmap_class_name_collapsed"] = self.rna.obs[
        "cellmap_class_name"].apply(lambda x: x + "***" + str(
            "Glutamatergic-" if ("glut" in x.lower()) else "") + str(
                "GABAergic-" if "gaba" in x.lower() else "") + str(
                    "Oligodendrocyte-" if "oligo" in x.lower() else "") + str(
                        "Cholinergic-" if "chol" in x.lower() else ""))
    self.rna.obs.loc[:, "cellmap_class_name_collapsed"] = self.rna.obs[
        "cellmap_class_name_collapsed"].apply(
            lambda x: x.split("***")[0 if x.split("***")[1] == "" else 1])
    self.rna.obs.loc[:, "cellmap_class_name_collapsed"] = self.rna.obs[
        "cellmap_class_name_collapsed"].apply(
            lambda x: x[:-1] if x[-1] == "-" else x)

Final Write¶

In [42]:
# Descriptives
if all((i in self.rna.obs for i in [
        "Factor Value[Age]", "Characteristics[Age at Euthanasia]"])):
    print(self.rna.obs[["Factor Value[Age]",
                        "Characteristics[Age at Euthanasia]"]].value_counts())

# Write h5ad
# self.rna.X = self.rna.layers["counts"].copy()
if overwrite is True or not os.path.exists(file_new):
    print("\n\n", f"Writing file to {file_new}...")
    self.rna.write_h5ad(file_new)

## Write Version Compatible with Older Packages
# adata = self.rna.copy()
# adata.uns = {}
# # adata.write_h5ad(os.path.splitext(file_new)[0] + "_compatible.h5ad")

# Send Email with Output When Done
if email is not None and html_out is not None:
    os.system(f"jupyter nbconvert --to html {cur_file}")
    os.system(f"echo 'yay' | mutt -s 'JOB DONE' -a {html_out} -- {email}")
... storing 'Age_End' as categorical

 Writing file to data/OSD-612_integrated.h5ad...

SCRATCH (IGNORE)¶

In [ ]:
# mks_c = dict(zip(markers_predefined, [markers_predefined[x].intersection(
#         self.rna.var_names) for x in markers_predefined]))
# mks_c["Inhibitory"] = mks_c["Inhibitory"].difference(mks_c["Excitatory"])
# mks_c["Excitatory"] = mks_c["Excitatory"].difference(mks_c["Inhibitory"])
# endo = list(mks_c["Endothelial"].difference(set(functools.reduce(
#     lambda i, j: i + j, [list(
#         mks_c[i]) for i in mks_c if i != "Endothelial"]))))
# _ = self.plot(genes=endo, figsize=(15, 15),
#               col_celltype="annotation_scanvi",
#               # col_celltype="annotation_by_overlap",
#               layer="scaled", standard_scale="var", kind="matrix")
if email is not None and html_out is not None:
    os.system(f"jupyter nbconvert --to html {cur_file}")
    os.system(f"echo 'yay' | mutt -s 'JOB DONE' -a {html_out} -- {email}")